Skip to content

Commit 27560f1

Browse files
committed
Fix Unstructured::arbitrary_take_rest_iter for collections of collections
This simply makes it match the behavior of `arbitrary_iter`. I experimented with more approaches, but I couldn't get anything that was better in terms of balancing simplicity of implementation, ease of understanding the algorithm, and generating all expected values. This additionally adds a testing helper function for exhaustively generating certain byte buffers and asserting that we generate the expected arbitrary values from those byte buffers. Fixes #158
1 parent 80d6bfe commit 27560f1

File tree

4 files changed

+140
-24
lines changed

4 files changed

+140
-24
lines changed

Cargo.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ authors = [
1010
"Corey Farwell <coreyf@rwell.org>",
1111
]
1212
categories = ["development-tools::testing"]
13-
edition = "2018"
13+
edition = "2021"
1414
keywords = ["arbitrary", "testing"]
1515
readme = "README.md"
1616
description = "The trait for generating structured data from unstructured data"
@@ -37,3 +37,6 @@ required-features = ["derive"]
3737

3838
[workspace]
3939
members = ["./fuzz"]
40+
41+
[dev-dependencies]
42+
exhaustigen = "0.1.0"

src/lib.rs

Lines changed: 130 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1161,6 +1161,56 @@ impl<'a> Arbitrary<'a> for IpAddr {
11611161
mod test {
11621162
use super::*;
11631163

1164+
/// Assert that the given expected values are all generated.
1165+
///
1166+
/// Exhaustively enumerates all buffers up to length 10 containing the
1167+
/// following bytes: `0x00`, `0x01`, `0x61` (aka ASCII 'a'), and `0xff`
1168+
fn assert_generates<T>(expected_values: impl IntoIterator<Item = T>)
1169+
where
1170+
T: Clone + std::fmt::Debug + std::hash::Hash + Eq + for<'a> Arbitrary<'a>,
1171+
{
1172+
let expected_values: HashSet<_> = expected_values.into_iter().collect();
1173+
let mut arbitrary_expected = expected_values.clone();
1174+
let mut arbitrary_take_rest_expected = expected_values;
1175+
1176+
let bytes = [0, 1, b'a', 0xff];
1177+
let max_len = 10;
1178+
1179+
let mut buf = Vec::with_capacity(max_len);
1180+
1181+
let mut g = exhaustigen::Gen::new();
1182+
while !g.done() {
1183+
let len = g.gen(max_len);
1184+
1185+
buf.clear();
1186+
buf.extend(
1187+
std::iter::repeat_with(|| {
1188+
let index = g.gen(bytes.len() - 1);
1189+
bytes[index]
1190+
})
1191+
.take(len),
1192+
);
1193+
1194+
let mut u = Unstructured::new(&buf);
1195+
let val = T::arbitrary(&mut u).unwrap();
1196+
arbitrary_expected.remove(&val);
1197+
1198+
let u = Unstructured::new(&buf);
1199+
let val = T::arbitrary_take_rest(u).unwrap();
1200+
arbitrary_take_rest_expected.remove(&val);
1201+
1202+
if arbitrary_expected.is_empty() && arbitrary_take_rest_expected.is_empty() {
1203+
return;
1204+
}
1205+
}
1206+
1207+
panic!(
1208+
"failed to generate all expected values!\n\n\
1209+
T::arbitrary did not generate: {arbitrary_expected:#?}\n\n\
1210+
T::arbitrary_take_rest did not generate {arbitrary_take_rest_expected:#?}"
1211+
)
1212+
}
1213+
11641214
/// Generates an arbitrary `T`, and checks that the result is consistent with the
11651215
/// `size_hint()` reported by `T`.
11661216
fn checked_arbitrary<'a, T: Arbitrary<'a>>(u: &mut Unstructured<'a>) -> Result<T> {
@@ -1231,6 +1281,16 @@ mod test {
12311281
let expected = 1 | (2 << 8) | (3 << 16) | (4 << 24);
12321282
let actual = checked_arbitrary::<i32>(&mut buf).unwrap();
12331283
assert_eq!(expected, actual);
1284+
1285+
assert_generates([
1286+
i32::from_ne_bytes([0, 0, 0, 0]),
1287+
i32::from_ne_bytes([0, 0, 0, 1]),
1288+
i32::from_ne_bytes([0, 0, 1, 0]),
1289+
i32::from_ne_bytes([0, 1, 0, 0]),
1290+
i32::from_ne_bytes([1, 0, 0, 0]),
1291+
i32::from_ne_bytes([1, 1, 1, 1]),
1292+
i32::from_ne_bytes([0xff, 0xff, 0xff, 0xff]),
1293+
]);
12341294
}
12351295

12361296
#[test]
@@ -1251,6 +1311,74 @@ mod test {
12511311
assert_eq!(expected, actual);
12521312
}
12531313

1314+
#[test]
1315+
fn arbitrary_for_vec_u8() {
1316+
assert_generates::<Vec<u8>>([
1317+
vec![],
1318+
vec![0],
1319+
vec![1],
1320+
vec![0, 0],
1321+
vec![0, 1],
1322+
vec![1, 0],
1323+
vec![1, 1],
1324+
vec![0, 0, 0],
1325+
vec![0, 0, 1],
1326+
vec![0, 1, 0],
1327+
vec![0, 1, 1],
1328+
vec![1, 0, 0],
1329+
vec![1, 0, 1],
1330+
vec![1, 1, 0],
1331+
vec![1, 1, 1],
1332+
]);
1333+
}
1334+
1335+
#[test]
1336+
fn arbitrary_for_vec_vec_u8() {
1337+
assert_generates::<Vec<Vec<u8>>>([
1338+
vec![],
1339+
vec![vec![]],
1340+
vec![vec![0]],
1341+
vec![vec![1]],
1342+
vec![vec![0, 1]],
1343+
vec![vec![], vec![]],
1344+
vec![vec![0], vec![]],
1345+
vec![vec![], vec![1]],
1346+
vec![vec![0], vec![1]],
1347+
vec![vec![0, 1], vec![]],
1348+
vec![vec![], vec![1, 0]],
1349+
vec![vec![], vec![], vec![]],
1350+
]);
1351+
}
1352+
1353+
#[test]
1354+
fn arbitrary_for_vec_vec_vec_u8() {
1355+
assert_generates::<Vec<Vec<Vec<u8>>>>([
1356+
vec![],
1357+
vec![vec![]],
1358+
vec![vec![vec![0]]],
1359+
vec![vec![vec![1]]],
1360+
vec![vec![vec![0, 1]]],
1361+
vec![vec![], vec![]],
1362+
vec![vec![], vec![vec![]]],
1363+
vec![vec![vec![]], vec![]],
1364+
vec![vec![vec![]], vec![vec![]]],
1365+
vec![vec![vec![0]], vec![]],
1366+
vec![vec![], vec![vec![1]]],
1367+
vec![vec![vec![0]], vec![vec![1]]],
1368+
vec![vec![vec![0, 1]], vec![]],
1369+
vec![vec![], vec![vec![0, 1]]],
1370+
vec![vec![], vec![], vec![]],
1371+
vec![vec![vec![]], vec![], vec![]],
1372+
vec![vec![], vec![vec![]], vec![]],
1373+
vec![vec![], vec![], vec![vec![]]],
1374+
]);
1375+
}
1376+
1377+
#[test]
1378+
fn arbitrary_for_string() {
1379+
assert_generates::<String>(["".into(), "a".into(), "aa".into(), "aaa".into()]);
1380+
}
1381+
12541382
#[test]
12551383
fn arbitrary_collection() {
12561384
let x = [
@@ -1284,11 +1412,11 @@ mod test {
12841412
);
12851413
assert_eq!(
12861414
checked_arbitrary_take_rest::<Vec<u8>>(Unstructured::new(&x)).unwrap(),
1287-
&[1, 2, 3, 4]
1415+
&[2, 4]
12881416
);
12891417
assert_eq!(
12901418
checked_arbitrary_take_rest::<Vec<u32>>(Unstructured::new(&x)).unwrap(),
1291-
&[0x4030201]
1419+
&[0x040302]
12921420
);
12931421
assert_eq!(
12941422
checked_arbitrary_take_rest::<String>(Unstructured::new(&x)).unwrap(),

src/unstructured.rs

Lines changed: 5 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -620,14 +620,8 @@ impl<'a> Unstructured<'a> {
620620
pub fn arbitrary_take_rest_iter<ElementType: Arbitrary<'a>>(
621621
self,
622622
) -> Result<ArbitraryTakeRestIter<'a, ElementType>> {
623-
let (lower, upper) = ElementType::size_hint(0);
624-
625-
let elem_size = upper.unwrap_or(lower * 2);
626-
let elem_size = std::cmp::max(1, elem_size);
627-
let size = self.len() / elem_size;
628623
Ok(ArbitraryTakeRestIter {
629-
size,
630-
u: Some(self),
624+
u: self,
631625
_marker: PhantomData,
632626
})
633627
}
@@ -735,25 +729,16 @@ impl<'a, 'b, ElementType: Arbitrary<'a>> Iterator for ArbitraryIter<'a, 'b, Elem
735729

736730
/// Utility iterator produced by [`Unstructured::arbitrary_take_rest_iter`]
737731
pub struct ArbitraryTakeRestIter<'a, ElementType> {
738-
u: Option<Unstructured<'a>>,
739-
size: usize,
732+
u: Unstructured<'a>,
740733
_marker: PhantomData<ElementType>,
741734
}
742735

743736
impl<'a, ElementType: Arbitrary<'a>> Iterator for ArbitraryTakeRestIter<'a, ElementType> {
744737
type Item = Result<ElementType>;
745738
fn next(&mut self) -> Option<Result<ElementType>> {
746-
if let Some(mut u) = self.u.take() {
747-
if self.size == 1 {
748-
Some(Arbitrary::arbitrary_take_rest(u))
749-
} else if self.size == 0 {
750-
None
751-
} else {
752-
self.size -= 1;
753-
let ret = Arbitrary::arbitrary(&mut u);
754-
self.u = Some(u);
755-
Some(ret)
756-
}
739+
let keep_going = self.u.arbitrary().unwrap_or(false);
740+
if keep_going {
741+
Some(Arbitrary::arbitrary(&mut self.u))
757742
} else {
758743
None
759744
}

tests/derive.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ fn test_take_rest() {
5959
assert_eq!(s2.1, true);
6060
assert_eq!(s1.2, 0x4030201);
6161
assert_eq!(s2.2, 0x4030201);
62-
assert_eq!(s1.3, vec![0x605, 0x807]);
62+
assert_eq!(s1.3, vec![0x0706]);
6363
assert_eq!(s2.3, "\x05\x06\x07\x08");
6464
}
6565

0 commit comments

Comments
 (0)