Add bindings for custom crossover with example

* Expose `LLVMFuzzerCustomCrossOver` through `fuzz_crossover` macro. * `example_crossover` uses `fuzz_mutator` & `fuzz_crossover`
rust-fuzz · Nov 26, 2023 · 7d4e076 · 7d4e076
1 parent 910a31a
commit 7d4e076
Show file tree

Hide file tree

Showing 10 changed files with 272 additions and 1 deletion.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,7 +4,8 @@ Released YYYY-MM-DD.
 
 ### Added
 
-* TODO (or remove section if none)
+* Bindings to `LLVMFuzzerCustomCrossOver` through the `fuzz_crossover` macro.
+* `example_crossover` using both `fuzz_mutator` and `fuzz_crossover` (adapted from @rigtorp)
 
 ### Changed
 

diff --git a/Cargo.toml b/Cargo.toml
@@ -24,6 +24,7 @@ arbitrary-derive = ["arbitrary/derive"]
 members = [
   "./example/fuzz",
   "./example_arbitrary/fuzz",
+  "./example_crossover/fuzz",
   "./example_mutator/fuzz",
 ]
 

diff --git a/ci/script.sh b/ci/script.sh
@@ -30,4 +30,10 @@ cargo fuzz build  --dev
 (! cargo fuzz run boom -- -runs=10000000)
 popd
 
+pushd ./example_crossover
+cargo fuzz build
+cargo fuzz build  --dev
+(! cargo fuzz run --release boom -- -runs=10000000)
+popd
+
 echo "All good!"
diff --git a/example_crossover/Cargo.toml b/example_crossover/Cargo.toml
@@ -0,0 +1,8 @@
+[package]
+name = "example_crossover"
+version = "0.1.0"
+authors = ["R. Elliott Childre"]
+edition = "2021"
+
+[target.'cfg(fuzzing)'.dependencies]
+rand = "0.8"
diff --git a/example_crossover/README.md b/example_crossover/README.md
@@ -0,0 +1,29 @@
+# A Custom Crossover Example
+
+This example is a reimplementation of [Erik Rigtorp's floating point summation fuzzing example][1]
+in the Rust bindings for LibFuzzer, provided by this crate.  In particular Erik uses both custom
+mutator, and a custom crossover function, which provides a well-documented, complex code example.
+
+This is mostly a one-to-one rewrite of the C++ code in the blog post, with the big difference
+being the method of converting the raw bytes that is exposed to the custom functions, into the
+decoded double-precision floating-point values. Where in C++ we can simply do:
+
+```c++
+uint8_t *Data = ...;
+size_t Size = ...;
+double *begin = (double *)Data;
+double *end = (double *)Data + Size / sizeof(double);
+```
+
+In Rust, however, the task seems a bit more complex due to strictness on alignment:
+
+* [Rust, how to slice into a byte array as if it were a float array? - Stack Overflow][2]
+* [Re-interpret slice of bytes (e.g. [u8]) as slice of [f32] - help - The Rust Programming Language Forum][3]
+* [How to transmute a u8 buffer to struct in Rust? - Stack Overflow][4]
+
+So the casting of `Data` in C++ are now calls to `slice::align_to{_mut}`
+
+[1]: https://rigtorp.se/fuzzing-floating-point-code/
+[2]: https://stackoverflow.com/a/73174764
+[3]: https://users.rust-lang.org/t/re-interpret-slice-of-bytes-e-g-u8-as-slice-of-f32/34551
+[4]: https://stackoverflow.com/a/59292352
diff --git a/example_crossover/fuzz/Cargo.toml b/example_crossover/fuzz/Cargo.toml
@@ -0,0 +1,17 @@
+[package]
+name = "example_crossover_fuzz"
+version = "0.1.0"
+authors = ["R. Elliott Childre"]
+edition = "2021"
+
+[package.metadata]
+cargo-fuzz = true
+
+[dependencies]
+rand = "0.8"
+libfuzzer-sys = { path = "../.." }
+example_crossover = { path = ".." }
+
+[[bin]]
+name = "boom"
+path = "fuzz_targets/boom.rs"
diff --git a/example_crossover/fuzz/artifacts/boom/crash-6f37ac2111469380f475f6c307a915f5917fed6b b/example_crossover/fuzz/artifacts/boom/crash-6f37ac2111469380f475f6c307a915f5917fed6b
diff --git a/example_crossover/fuzz/fuzz_targets/boom.rs b/example_crossover/fuzz/fuzz_targets/boom.rs
@@ -0,0 +1,139 @@
+#![no_main]
+
+use example_crossover::sum;
+use libfuzzer_sys::{fuzz_crossover, fuzz_mutator, fuzz_target};
+use rand::distributions::{Bernoulli, Distribution, Uniform};
+use rand::rngs::StdRng;
+use rand::seq::SliceRandom;
+use rand::SeedableRng;
+use std::mem::size_of;
+
+fuzz_target!(|data: &[u8]| {
+    let (_, floats, _) = unsafe { data.align_to::<f64>() };
+
+    let res = sum(floats);
+
+    assert!(
+        !res.is_nan(),
+        "The sum of the following floats caused a crash: {floats:?}"
+    );
+});
+
+fn rfp(rng: &mut StdRng) -> f64 {
+    match Uniform::new_inclusive(0, 10).sample(rng) {
+        0 => f64::NAN,
+        1 => f64::MIN,
+        2 => f64::MAX,
+        3 => -f64::MIN,
+        4 => -f64::MAX,
+        5 => f64::EPSILON,
+        6 => -f64::EPSILON,
+        7 => f64::INFINITY,
+        8 => f64::NEG_INFINITY,
+        9 => 0.0,
+        10 => Uniform::new_inclusive(-1.0, 1.0).sample(rng),
+        _ => 0.0,
+    }
+}
+
+fuzz_mutator!(|data: &mut [u8], size: usize, max_size: usize, seed: u32| {
+    let mut gen = StdRng::seed_from_u64(seed.into());
+
+    match Uniform::new_inclusive(0, 3).sample(&mut gen) {
+        0 => {
+            // Change [an] element
+
+            // Not altering the size, so decode the intended space (i.e. `size`) as floats
+            let (_, floats, _) = unsafe { data[..size].align_to_mut::<f64>() };
+
+            if !floats.is_empty() {
+                let d = Uniform::new(0, floats.len());
+                floats[d.sample(&mut gen)] = rfp(&mut gen);
+            }
+        }
+        1 => {
+            // Add element [to the end]
+            let plus_one = size + size_of::<f64>();
+            if plus_one <= max_size {
+                // Adding 1, f64 to the size, so decode the intended space (i.e.
+                // `size`) plus one more (since we just checked it will fit) as floats
+                let (_, floats, _) = unsafe { data[..plus_one].align_to_mut::<f64>() };
+
+                let last = floats.last_mut().unwrap();
+                *last = rfp(&mut gen);
+
+                return plus_one;
+            }
+        }
+        2 => {
+            // Delete [the end] element
+
+            // Attempting to shrink the size by 1, f64, so decode the intended
+            // space (i.e. `size`) as floats and see if we have any
+            let (_, floats, _) = unsafe { data[..size].align_to::<f64>() };
+
+            if !floats.is_empty() {
+                return size - size_of::<f64>();
+            }
+        }
+        3 => {
+            // Shuffle elements
+
+            // Not altering the size, so decode the intended space (i.e. `size`) as floats
+            let (_, floats, _) = unsafe { data[..size].align_to_mut::<f64>() };
+            floats.shuffle(&mut gen);
+        }
+        _ => unreachable!(),
+    };
+
+    size
+});
+
+fuzz_crossover!(|data1: &[u8],
+                 size1: usize,
+                 data2: &[u8],
+                 size2: usize,
+                 out: &mut [u8],
+                 max_out_size: usize,
+                 seed: u32| {
+    let mut gen = StdRng::seed_from_u64(seed.into());
+
+    let bd = Bernoulli::new(0.5).unwrap();
+
+    // Decode the minimal amount of bytes in each potential source / destination
+    let min_bytes = [size1, size2, max_out_size].iter().min().unwrap() / size_of::<f64>();
+
+    // decode each sources to see how many floats we can pull with proper
+    // alignment, and destination as to how many will fit with proper alignment
+    //
+    // Keep track of the unaligned prefix to `out`, as we will need to remember
+    // that those bytes will remain prepended to the actual floats that we
+    // write into the out buffer.
+    let (out_pref, out_floats, _) = unsafe { out[..min_bytes].align_to_mut::<f64>() };
+    let (_, d1_floats, _) = unsafe { data1[..min_bytes].align_to::<f64>() };
+    let (_, d2_floats, _) = unsafe { data2[..min_bytes].align_to::<f64>() };
+
+    // Given that the sources and destinations may have drastically fewer
+    // available aligned floats than decoding allows for; again see which has
+    // the smallest number
+    let min_elems = *[out_floats.len(), d1_floats.len(), d2_floats.len()]
+        .iter()
+        .min()
+        .unwrap();
+
+    // Put into the destination, floats from either data1 or data2 if the
+    // Bernoulli distribution succeeds or fails
+    for i in 0..min_elems {
+        out_floats[i] = if bd.sample(&mut gen) {
+            d1_floats[i]
+        } else {
+            d2_floats[i]
+        };
+    }
+
+    // Now that we have written the true floats, report back to the fuzzing
+    // engine that we left the unaligned `out` prefix bytes at the beginning of
+    // `out` and also then the minimum floats that we could write into the
+    // aligned float section.
+    out_pref.len() * size_of::<u8>() + min_elems * size_of::<f64>()
+});
diff --git a/example_crossover/src/lib.rs b/example_crossover/src/lib.rs
@@ -0,0 +1,5 @@
+pub fn sum(floats: &[f64]) -> f64 {
+    floats
+        .iter()
+        .fold(0.0, |a, b| if b.is_nan() { a } else { a + b })
+}
diff --git a/src/lib.rs b/src/lib.rs
@@ -538,3 +538,68 @@ pub fn fuzzer_mutate(data: &mut [u8], size: usize, max_size: usize) -> usize {
     assert!(new_size <= data.len());
     new_size
 }
+
+/// Define a custom cross-over function to combine test cases.
+///
+/// This is optional, and libFuzzer will use its own, default cross-over strategy
+/// if this is not provided. (As of the time of writing, this default strategy
+/// takes alternating bytes sequences from the two test cases, to construct the
+/// new one)
+///
+/// This could potentially be useful if your input is, for instance, a
+/// sequences of fixed sized, multi-byte values and the crossover could then
+/// merge a discrete number of values rather than joining parts of a value.
+#[macro_export]
+macro_rules! fuzz_crossover {
+    (
+        |
+        $data1:ident : &[u8] ,
+        $size1:ident : usize ,
+        $data2:ident : &[u8] ,
+        $size2:ident : usize ,
+        $out:ident : &mut [u8] ,
+        $max_out_size:ident : usize ,
+        $seed:ident : u32 $(,)*
+        |
+        $body:block
+    ) => {
+        /// Auto-generated function. Do not use; only for LibFuzzer's
+        /// consumption.
+        #[export_name = "LLVMFuzzerCustomCrossOver"]
+        #[doc(hidden)]
+        pub unsafe fn rust_fuzzer_custom_crossover(
+            $data1: *const u8,
+            $size1: usize,
+            $data2: *const u8,
+            $size2: usize,
+            $out: *mut u8,
+            $max_out_size: usize,
+            $seed: std::os::raw::c_uint,
+        ) -> usize {
+            let $data1: &[u8] = std::slice::from_raw_parts($data1, $size1);
+            let $data2: &[u8] = std::slice::from_raw_parts($data2, $size2);
+            let $out: &mut [u8] = std::slice::from_raw_parts_mut($out, $max_out_size);
+
+            // `unsigned int` is generally a `u32`, but not on all targets. Do
+            // an infallible (and potentially lossy, but that's okay because it
+            // preserves determinism) conversion.
+            let $seed = $seed as u32;
+
+            // Define and invoke a new, safe function so that the body doesn't
+            // inherit `unsafe`.
+            fn custom_crossover(
+                $data1: &[u8],
+                $size1: usize,
+                $data2: &[u8],
+                $size2: usize,
+                $out: &mut [u8],
+                $max_out_size: usize,
+                $seed: u32,
+            ) -> usize {
+                $body
+            }
+
+            custom_crossover($data1, $size1, $data2, $size2, $out, $max_out_size, $seed)
+        }
+    };
+}