From dfe756f94ae9df6d551273fa71385eec713775ee Mon Sep 17 00:00:00 2001 From: David Cuthbert Date: Fri, 30 Sep 2022 21:30:58 -0700 Subject: [PATCH 1/4] Enable keep/reject inputs from the corpus This allows the fuzz target to indiciate whether an input was useful for the fuzz testing by returning Corpus::Keep or Corpus::Reject. Backwards compatibility is preserved by coercing the unit type () to Corpus::Keep. This maps to 0 (Keep) and -1 (Reject) in the libFuzzer API: https://llvm.org/docs/LibFuzzer.html#rejecting-unwanted-inputs --- src/lib.rs | 90 +++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 75 insertions(+), 15 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index fdf3a55..a84ba18 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -14,10 +14,37 @@ pub use arbitrary; use once_cell::sync::OnceCell; +/// Indicates whether the input should be kept in the corpus or rejected. This +/// should be returned by your fuzz target. If your fuzz target does not return +/// a value (i.e., returns `()`), then the input will be kept in the corpus. +#[derive(Debug)] +pub enum Corpus { + /// Keep the input in the corpus. + Keep, + + /// Reject the input and do not keep it in the corpus. + Reject, +} + +impl From<()> for Corpus { + fn from(_: ()) -> Self { + Self::Keep + } +} + +impl From for i32 { + fn from(value: Corpus) -> i32 { + match value { + Corpus::Keep => 0, + Corpus::Reject => -1, + } + } +} + extern "C" { // We do not actually cross the FFI bound here. #[allow(improper_ctypes)] - fn rust_fuzzer_test_input(input: &[u8]); + fn rust_fuzzer_test_input(input: &[u8]) -> i32; fn LLVMFuzzerMutate(data: *mut u8, size: usize, max_size: usize) -> usize; } @@ -27,14 +54,17 @@ extern "C" { pub fn test_input_wrap(data: *const u8, size: usize) -> i32 { let test_input = ::std::panic::catch_unwind(|| unsafe { let data_slice = ::std::slice::from_raw_parts(data, size); - rust_fuzzer_test_input(data_slice); + rust_fuzzer_test_input(data_slice) }); - if test_input.err().is_some() { - // hopefully the custom panic hook will be called before and abort the - // process before the stack frames are unwinded. - ::std::process::abort(); + + match test_input { + Ok(i) => i, + Err(_) => { + // hopefully the custom panic hook will be called before and abort the + // process before the stack frames are unwinded. + ::std::process::abort(); + } } - 0 } #[doc(hidden)] @@ -86,6 +116,30 @@ pub fn initialize(_argc: *const isize, _argv: *const *const *const u8) -> isize /// # mod my_crate { pub fn parse(_: &[u8]) -> Result<(), ()> { unimplemented!() } } /// ``` /// +/// ## Rejecting Inputs +/// +/// To indicate whether an input should be kept in or rejected from the corpus, +/// return a [Corpus] value from your fuzz target. For example: +/// +/// ```no_run +/// #![no_main] +/// +/// use libfuzzer_sys::{Corpus, fuzz_target}; +/// +/// fuzz_target!(|input: String| -> Corpus { +/// let parts: Vec<&str> = input.splitn(2, '=').collect(); +/// if parts.len() != 2 { +/// return Corpus::Reject; +/// } +/// +/// let key = parts[0]; +/// let value = parts[1]; +/// my_crate::parse(key, value); +/// Corpus::Keep +/// ); +/// # mod my_crate { pub fn parse(_key: &str, _value: &str) -> Result<(), ()> { unimplemented!() } } +/// ``` +/// /// ## Arbitrary Input Types /// /// The input is a `&[u8]` slice by default, but you can take arbitrary input @@ -139,7 +193,7 @@ macro_rules! fuzz_target { const _: () = { /// Auto-generated function #[no_mangle] - pub extern "C" fn rust_fuzzer_test_input(bytes: &[u8]) { + pub extern "C" fn rust_fuzzer_test_input(bytes: &[u8]) -> i32 { // When `RUST_LIBFUZZER_DEBUG_PATH` is set, write the debug // formatting of the input to that file. This is only intended for // `cargo fuzz`'s use! @@ -154,7 +208,8 @@ macro_rules! fuzz_target { return; } - run(bytes) + run(bytes); + 0 } // Split out the actual fuzzer into a separate function which is @@ -181,10 +236,14 @@ macro_rules! fuzz_target { }; (|$data:ident: $dty: ty| $body:block) => { + $crate::fuzz_target!(|$data: $dty| -> () $body); + }; + + (|$data:ident: $dty: ty| -> $rty: ty $body:block) => { const _: () = { /// Auto-generated function #[no_mangle] - pub extern "C" fn rust_fuzzer_test_input(bytes: &[u8]) { + pub extern "C" fn rust_fuzzer_test_input(bytes: &[u8]) -> i32 { use $crate::arbitrary::{Arbitrary, Unstructured}; // Early exit if we don't have enough bytes for the `Arbitrary` @@ -194,7 +253,7 @@ macro_rules! fuzz_target { // get to longer inputs that actually lead to interesting executions // quicker. if bytes.len() < <$dty as Arbitrary>::size_hint(0).0 { - return; + return -1; } let mut u = Unstructured::new(bytes); @@ -214,20 +273,21 @@ macro_rules! fuzz_target { Err(err) => writeln!(&mut file, "Arbitrary Error: {}", err), }) .expect("failed to write to `RUST_LIBFUZZER_DEBUG_PATH` file"); - return; + return -1; } let data = match data { Ok(d) => d, - Err(_) => return, + Err(_) => return -1, }; - run(data) + let result: i32 = ::libfuzzer_sys::Corpus::from(run(data)).into(); + result } // See above for why this is split to a separate function. #[inline(never)] - fn run($data: $dty) { + fn run($data: $dty) -> $rty { $body } }; From 028f4e15a252eefc95e550bd5a8c2513cc5f023d Mon Sep 17 00:00:00 2001 From: David Cuthbert Date: Sun, 16 Oct 2022 19:52:52 -0700 Subject: [PATCH 2/4] Add changes suggested from code review. Docs: make it explicit that we're ignoring the return value of the function under test. Add comments from libfuzzer explaining why one might want to keep inputs out of the corpus. Convert From to i32 to a pub fn to_libfuzzer_code() that is impl on Corpus to avoid accidental conversion. --- src/lib.rs | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index a84ba18..574134e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -32,9 +32,13 @@ impl From<()> for Corpus { } } -impl From for i32 { - fn from(value: Corpus) -> i32 { - match value { +impl Corpus { + #[doc(hidden)] + /// Convert this Corpus result into the [integer codes used by + /// `libFuzzer`](https://llvm.org/docs/LibFuzzer.html#rejecting-unwanted-inputs). + /// This is -1 for reject, 0 for keep. + pub fn to_libfuzzer_code(self) -> i32 { + match self { Corpus::Keep => 0, Corpus::Reject => -1, } @@ -118,8 +122,17 @@ pub fn initialize(_argc: *const isize, _argv: *const *const *const u8) -> isize /// /// ## Rejecting Inputs /// -/// To indicate whether an input should be kept in or rejected from the corpus, -/// return a [Corpus] value from your fuzz target. For example: +/// It may be desirable to reject some inputs, i.e. to not add them to the +/// corpus. +/// +/// For example, when fuzzing an API consisting of parsing and other logic, +/// one may want to allow only those inputs into the corpus that parse +/// successfully. To indicate whether an input should be kept in or rejected +/// from the corpus, return either [Corpus::Keep] or [Corpus::Reject] from your +/// fuzz target. The default behavior (e.g. if `()` is returned) is to keep the +/// input in the corpus. +/// +/// For example: /// /// ```no_run /// #![no_main] @@ -134,7 +147,7 @@ pub fn initialize(_argc: *const isize, _argv: *const *const *const u8) -> isize /// /// let key = parts[0]; /// let value = parts[1]; -/// my_crate::parse(key, value); +/// let _result: Result<_, _> = my_crate::parse(key, value); /// Corpus::Keep /// ); /// # mod my_crate { pub fn parse(_key: &str, _value: &str) -> Result<(), ()> { unimplemented!() } } @@ -281,8 +294,8 @@ macro_rules! fuzz_target { Err(_) => return -1, }; - let result: i32 = ::libfuzzer_sys::Corpus::from(run(data)).into(); - result + let result = ::libfuzzer_sys::Corpus::from(run(data)); + result.to_libfuzzer_code() } // See above for why this is split to a separate function. From 7ae224a4e002e872b67fc3d7775962ee8c07f3b1 Mon Sep 17 00:00:00 2001 From: David Cuthbert Date: Sun, 16 Oct 2022 20:19:36 -0700 Subject: [PATCH 3/4] Fix missing return branch. Fix doc tests. --- src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 574134e..bd5bc07 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -149,7 +149,7 @@ pub fn initialize(_argc: *const isize, _argv: *const *const *const u8) -> isize /// let value = parts[1]; /// let _result: Result<_, _> = my_crate::parse(key, value); /// Corpus::Keep -/// ); +/// }); /// # mod my_crate { pub fn parse(_key: &str, _value: &str) -> Result<(), ()> { unimplemented!() } } /// ``` /// @@ -218,7 +218,7 @@ macro_rules! fuzz_target { .expect("failed to create `RUST_LIBFUZZER_DEBUG_PATH` file"); writeln!(&mut file, "{:?}", bytes) .expect("failed to write to `RUST_LIBFUZZER_DEBUG_PATH` file"); - return; + return 0; } run(bytes); From 1a0499e95b563d1f30f0d07ee8c774c261f04f56 Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Tue, 18 Oct 2022 11:18:40 -0700 Subject: [PATCH 4/4] Remove trailing whitespace --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index bd5bc07..ce1eb98 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -131,7 +131,7 @@ pub fn initialize(_argc: *const isize, _argv: *const *const *const u8) -> isize /// from the corpus, return either [Corpus::Keep] or [Corpus::Reject] from your /// fuzz target. The default behavior (e.g. if `()` is returned) is to keep the /// input in the corpus. -/// +/// /// For example: /// /// ```no_run