From c773bcfbb17526fe8cddb5482efbe4b73a8b9bbc Mon Sep 17 00:00:00 2001
From: "R. Elliott Childre" <elliottchildre39@gmail.com>
Date: Sat, 25 Nov 2023 23:36:18 -0500
Subject: [PATCH] Add bindings for custom crossover with example

* Expose `LLVMFuzzerCustomCrossOver` through `fuzz_crossover` macro.
* `example_crossover` uses `fuzz_mutator` & `fuzz_crossover`
---
 CHANGELOG.md                                |   3 +-
 Cargo.toml                                  |   1 +
 ci/script.sh                                |   6 +
 example_crossover/.gitignore                |   1 +
 example_crossover/Cargo.toml                |   8 ++
 example_crossover/README.md                 |  34 +++++
 example_crossover/fuzz/Cargo.toml           |  17 +++
 example_crossover/fuzz/fuzz_targets/boom.rs | 128 +++++++++++++++++
 example_crossover/src/lib.rs                |   5 +
 src/lib.rs                                  | 145 ++++++++++++++++++++
 10 files changed, 347 insertions(+), 1 deletion(-)
 create mode 100644 example_crossover/.gitignore
 create mode 100644 example_crossover/Cargo.toml
 create mode 100644 example_crossover/README.md
 create mode 100644 example_crossover/fuzz/Cargo.toml
 create mode 100755 example_crossover/fuzz/fuzz_targets/boom.rs
 create mode 100644 example_crossover/src/lib.rs

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 697e71b..6c30095 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,7 +4,8 @@ Released YYYY-MM-DD.
 
 ### Added
 
-* TODO (or remove section if none)
+* Bindings to `LLVMFuzzerCustomCrossOver` through the `fuzz_crossover` macro.
+* `example_crossover` using both `fuzz_mutator` and `fuzz_crossover` (adapted from @rigtorp)
 
 ### Changed
 
diff --git a/Cargo.toml b/Cargo.toml
index 9aaf813..bdcd0d9 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -24,6 +24,7 @@ arbitrary-derive = ["arbitrary/derive"]
 members = [
   "./example/fuzz",
   "./example_arbitrary/fuzz",
+  "./example_crossover/fuzz",
   "./example_mutator/fuzz",
 ]
 
diff --git a/ci/script.sh b/ci/script.sh
index 59462d4..3eb2d17 100755
--- a/ci/script.sh
+++ b/ci/script.sh
@@ -30,4 +30,10 @@ cargo fuzz build  --dev
 (! cargo fuzz run boom -- -runs=10000000)
 popd
 
+pushd ./example_crossover
+cargo fuzz build
+cargo fuzz build  --dev
+(! cargo fuzz run --release boom -- -runs=10000000)
+popd
+
 echo "All good!"
diff --git a/example_crossover/.gitignore b/example_crossover/.gitignore
new file mode 100644
index 0000000..d8d1df6
--- /dev/null
+++ b/example_crossover/.gitignore
@@ -0,0 +1 @@
+crash-*
diff --git a/example_crossover/Cargo.toml b/example_crossover/Cargo.toml
new file mode 100644
index 0000000..441b5e3
--- /dev/null
+++ b/example_crossover/Cargo.toml
@@ -0,0 +1,8 @@
+[package]
+name = "example_crossover"
+version = "0.1.0"
+authors = ["R. Elliott Childre"]
+edition = "2021"
+
+[target.'cfg(fuzzing)'.dependencies]
+rand = "0.8"
diff --git a/example_crossover/README.md b/example_crossover/README.md
new file mode 100644
index 0000000..ee6a4d3
--- /dev/null
+++ b/example_crossover/README.md
@@ -0,0 +1,34 @@
+# A Custom Crossover Example
+
+## Overview
+
+This example is a reimplementation of [Erik Rigtorp's floating point summation fuzzing example][1]
+in the Rust bindings for LibFuzzer, provided by this crate.  In this particular example, Erik uses
+both a custom mutator, and a custom crossover function, which provides a well-documented, complex
+code example.
+
+## Implementation
+
+This is mostly a one-to-one rewrite of the C++ code in the blog post, with the big difference
+being the method of converting the raw bytes that is exposed to the custom functions, into the
+decoded double-precision floating-point values. Where in C++ we can simply do:
+
+```c++
+uint8_t *Data = ...;
+size_t Size = ...;
+double *begin = (double *)Data;
+double *end = (double *)Data + Size / sizeof(double);
+```
+
+In Rust, however, the task seems a bit more complex due to strictness on alignment:
+
+* [Rust, how to slice into a byte array as if it were a float array? - Stack Overflow][2]
+* [Re-interpret slice of bytes (e.g. [u8]) as slice of [f32] - help - The Rust Programming Language Forum][3]
+* [How to transmute a u8 buffer to struct in Rust? - Stack Overflow][4]
+
+So the casting of `Data` in the blog post's C++ are now `slice::align_to{_mut}` calls
+
+[1]: https://rigtorp.se/fuzzing-floating-point-code/
+[2]: https://stackoverflow.com/a/73174764
+[3]: https://users.rust-lang.org/t/re-interpret-slice-of-bytes-e-g-u8-as-slice-of-f32/34551
+[4]: https://stackoverflow.com/a/59292352
diff --git a/example_crossover/fuzz/Cargo.toml b/example_crossover/fuzz/Cargo.toml
new file mode 100644
index 0000000..31b8783
--- /dev/null
+++ b/example_crossover/fuzz/Cargo.toml
@@ -0,0 +1,17 @@
+[package]
+name = "example_crossover_fuzz"
+version = "0.1.0"
+authors = ["R. Elliott Childre"]
+edition = "2021"
+
+[package.metadata]
+cargo-fuzz = true
+
+[dependencies]
+rand = "0.8"
+libfuzzer-sys = { path = "../.." }
+example_crossover = { path = ".." }
+
+[[bin]]
+name = "boom"
+path = "fuzz_targets/boom.rs"
diff --git a/example_crossover/fuzz/fuzz_targets/boom.rs b/example_crossover/fuzz/fuzz_targets/boom.rs
new file mode 100755
index 0000000..f34bfbf
--- /dev/null
+++ b/example_crossover/fuzz/fuzz_targets/boom.rs
@@ -0,0 +1,128 @@
+#![no_main]
+
+use example_crossover::sum;
+use libfuzzer_sys::{fuzz_crossover, fuzz_mutator, fuzz_target};
+use rand::distributions::{Bernoulli, Distribution, Uniform};
+use rand::{rngs::StdRng, seq::SliceRandom, SeedableRng};
+use std::mem::size_of;
+
+fuzz_target!(|data: &[u8]| {
+    let (_, floats, _) = unsafe { data.align_to::<f64>() };
+
+    let res = sum(floats);
+
+    assert!(
+        !res.is_nan(),
+        "The sum of the following f64's resulted in a NaN: {floats:?}"
+    );
+});
+
+fn rfp(rng: &mut StdRng) -> f64 {
+    match Uniform::new_inclusive(0, 10).sample(rng) {
+        0 => f64::NAN,
+        1 => f64::MIN,
+        2 => f64::MAX,
+        3 => -f64::MIN,
+        4 => -f64::MAX,
+        5 => f64::EPSILON,
+        6 => -f64::EPSILON,
+        7 => f64::INFINITY,
+        8 => f64::NEG_INFINITY,
+        9 => 0.0,
+        10 => Uniform::new_inclusive(-1.0, 1.0).sample(rng),
+        _ => 0.0,
+    }
+}
+
+fuzz_mutator!(|data: &mut [u8], size: usize, max_size: usize, seed: u32| {
+    let mut gen = StdRng::seed_from_u64(seed.into());
+
+    match Uniform::new_inclusive(0, 3).sample(&mut gen) {
+        0 => {
+            // "Change [an] element"
+
+            // Not altering the size, so decode the intended space (i.e. `size`) as floats
+            let (_, floats, _) = unsafe { data[..size].align_to_mut::<f64>() };
+
+            if !floats.is_empty() {
+                let d = Uniform::new(0, floats.len());
+                floats[d.sample(&mut gen)] = rfp(&mut gen);
+            }
+        }
+        1 => {
+            // "Add [an] element [to the end]"
+            let plus_one = size + size_of::<f64>();
+            if plus_one <= max_size {
+                // Adding 1, f64 to the size, so decode the intended space (i.e.
+                // `size`) plus one more (since we just checked it will fit) as floats
+                let (_, floats, _) = unsafe { data[..plus_one].align_to_mut::<f64>() };
+
+                let last = floats.last_mut().unwrap();
+                *last = rfp(&mut gen);
+
+                return plus_one;
+            }
+        }
+        2 => {
+            // "Delete [the end] element"
+
+            // Attempting to shrink the size by 1, f64, so decode the intended
+            // space (i.e. `size`) as floats and see if we have any
+            let (_, floats, _) = unsafe { data[..size].align_to::<f64>() };
+
+            if !floats.is_empty() {
+                return size - size_of::<f64>();
+            }
+        }
+        3 => {
+            // "Shuffle [the] elements"
+
+            // Not altering the size, so decode the intended space (i.e. `size`) as floats
+            let (_, floats, _) = unsafe { data[..size].align_to_mut::<f64>() };
+            floats.shuffle(&mut gen);
+        }
+        _ => unreachable!(),
+    };
+
+    size
+});
+
+fuzz_crossover!(|data1: &[u8], data2: &[u8], out: &mut [u8], seed: u32| {
+    let mut gen = StdRng::seed_from_u64(seed.into());
+
+    let bd = Bernoulli::new(0.5).unwrap();
+
+    // Decode each source to see how many floats we can pull with proper
+    // alignment, and destination as to how many will fit with proper alignment
+    //
+    // Keep track of the unaligned prefix to `out`, as we will need to remember
+    // that those bytes will remain prepended to the actual floats that we
+    // write into the out buffer.
+    let (out_pref, out_floats, _) = unsafe { out.align_to_mut::<f64>() };
+    let (_, d1_floats, _) = unsafe { data1.align_to::<f64>() };
+    let (_, d2_floats, _) = unsafe { data2.align_to::<f64>() };
+
+    // Given that the sources and destinations may have drastically fewer
+    // available aligned floats than decoding allows for; see which has the
+    // smallest number.
+    let n = *[out_floats.len(), d1_floats.len(), d2_floats.len()]
+        .iter()
+        .min()
+        .unwrap();
+
+    // Put into the destination, floats from either data1 or data2 if the
+    // Bernoulli distribution succeeds or fails
+    for i in 0..n {
+        out_floats[i] = if bd.sample(&mut gen) {
+            d1_floats[i]
+        } else {
+            d2_floats[i]
+        };
+    }
+
+    // Now that we have written the true floats, report back to the fuzzing
+    // engine that we left the unaligned `out` prefix bytes at the beginning of
+    // `out` and also then the floats that we wrote into the aligned float
+    // section.
+    out_pref.len() * size_of::<u8>() + n * size_of::<f64>()
+});
diff --git a/example_crossover/src/lib.rs b/example_crossover/src/lib.rs
new file mode 100644
index 0000000..550d49f
--- /dev/null
+++ b/example_crossover/src/lib.rs
@@ -0,0 +1,5 @@
+pub fn sum(floats: &[f64]) -> f64 {
+    floats
+        .iter()
+        .fold(0.0, |a, b| if b.is_nan() { a } else { a + b })
+}
diff --git a/src/lib.rs b/src/lib.rs
index ddf0d35..1cd12ee 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -538,3 +538,148 @@ pub fn fuzzer_mutate(data: &mut [u8], size: usize, max_size: usize) -> usize {
     assert!(new_size <= data.len());
     new_size
 }
+
+/// Define a custom cross-over function to combine test cases.
+///
+/// This is optional, and libFuzzer will use its own, default cross-over strategy
+/// if this is not provided. (As of the time of writing, this default strategy
+/// takes alternating byte sequences from the two test cases, to construct the
+/// new one) (see `FuzzerCrossOver.cpp`)
+///
+/// This could potentially be useful if your input is, for instance, a
+/// sequence of fixed sized, multi-byte values and the crossover could then
+/// merge discrete values rather than joining parts of a value.
+///
+/// ## Implementation Contract
+///
+/// The original, read-only inputs are given in the full slices of `data1`, and
+/// `data2` (as opposed to the, potentially, partial slice of `data` in
+/// [the `fuzz_mutator!` macro][crate::fuzz_mutator]).
+///
+/// You must place the new input merged from the two existing inputs' data
+/// into `out` and return the size of the relevant data written to that slice.
+///
+/// The deterministic requirements from [the `fuzz_mutator!` macro][crate::fuzz_mutator]
+/// apply as well to the `seed` parameter
+///
+/// ## Example: Floating-Point Sum NaN
+///
+/// ```no_run
+/// #![no_main]
+///
+/// use libfuzzer_sys::{fuzz_crossover, fuzz_mutator, fuzz_target, fuzzer_mutate};
+/// use rand::{rngs::StdRng, Rng, SeedableRng};
+/// use std::mem::size_of;
+///
+/// fuzz_target!(|data: &[u8]| {
+///     let (_, floats, _) = unsafe { data.align_to::<f64>() };
+///
+///     let res = floats
+///         .iter()
+///         .fold(0.0, |a, b| if b.is_nan() { a } else { a + b });
+///
+///     assert!(
+///         !res.is_nan(),
+///         "The sum of the following floats resulted in a NaN: {floats:?}"
+///     );
+/// });
+///
+/// // Inject some ...potentially problematic values to make the example close
+/// // more quickly.
+/// fuzz_mutator!(|data: &mut [u8], size: usize, max_size: usize, seed: u32| {
+///     let mut gen = StdRng::seed_from_u64(seed.into());
+///
+///     let (_, floats, _) = unsafe { data[..size].align_to_mut::<f64>() };
+///
+///     let x = gen.gen_range(0..=1000);
+///     if x == 0 && !floats.is_empty() {
+///         floats[0] = f64::INFINITY;
+///     } else if x == 1000 && floats.len() > 1 {
+///         floats[1] = f64::NEG_INFINITY;
+///     } else {
+///         return fuzzer_mutate(data, size, max_size);
+///     }
+///
+///     size
+/// });
+///
+/// fuzz_crossover!(|data1: &[u8], data2: &[u8], out: &mut [u8], _seed: u32| {
+///     // Decode each source to see how many floats we can pull with proper
+///     // alignment, and destination as to how many will fit with proper alignment
+///     //
+///     // Keep track of the unaligned prefix to `out`, as we will need to remember
+///     // that those bytes will remain prepended to the actual floats that we
+///     // write into the out buffer.
+///     let (out_pref, out_floats, _) = unsafe { out.align_to_mut::<f64>() };
+///     let (_, d1_floats, _) = unsafe { data1.align_to::<f64>() };
+///     let (_, d2_floats, _) = unsafe { data2.align_to::<f64>() };
+///
+///     // Put into the destination, floats first from data1 then from data2, ...if
+///     // possible given the size of `out`
+///     let mut i: usize = 0;
+///     for float in d1_floats.iter().chain(d2_floats).take(out_floats.len()) {
+///         out_floats[i] = *float;
+///         i += 1;
+///     }
+///
+///     // Now that we have written the true floats, report back to the fuzzing
+///     // engine that we left the unaligned `out` prefix bytes at the beginning of
+///     // `out` and also then the floats that we wrote into the aligned float
+///     // section.
+///     out_pref.len() * size_of::<u8>() + i * size_of::<f64>()
+/// });
+/// ```
+///
+/// This example is a minimized version of [Erik Rigtorp's floating point summation fuzzing example][1].
+/// A more detailed version of this experiment can be found in the
+/// `example_crossover` directory.
+///
+/// [1]: https://rigtorp.se/fuzzing-floating-point-code/
+#[macro_export]
+macro_rules! fuzz_crossover {
+    (
+        |
+        $data1:ident : &[u8] ,
+        $data2:ident : &[u8] ,
+        $out:ident : &mut [u8] ,
+        $seed:ident : u32 $(,)*
+        |
+        $body:block
+    ) => {
+        /// Auto-generated function. Do not use; only for LibFuzzer's
+        /// consumption.
+        #[export_name = "LLVMFuzzerCustomCrossOver"]
+        #[doc(hidden)]
+        pub unsafe fn rust_fuzzer_custom_crossover(
+            $data1: *const u8,
+            size1: usize,
+            $data2: *const u8,
+            size2: usize,
+            $out: *mut u8,
+            max_out_size: usize,
+            $seed: std::os::raw::c_uint,
+        ) -> usize {
+            let $data1: &[u8] = std::slice::from_raw_parts($data1, size1);
+            let $data2: &[u8] = std::slice::from_raw_parts($data2, size2);
+            let $out: &mut [u8] = std::slice::from_raw_parts_mut($out, max_out_size);
+
+            // `unsigned int` is generally a `u32`, but not on all targets. Do
+            // an infallible (and potentially lossy, but that's okay because it
+            // preserves determinism) conversion.
+            let $seed = $seed as u32;
+
+            // Define and invoke a new, safe function so that the body doesn't
+            // inherit `unsafe`.
+            fn custom_crossover(
+                $data1: &[u8],
+                $data2: &[u8],
+                $out: &mut [u8],
+                $seed: u32,
+            ) -> usize {
+                $body
+            }
+
+            custom_crossover($data1, $data2, $out, $seed)
+        }
+    };
+}