diff --git a/README.md b/README.md index b6cf608..ba2b015 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,11 @@ Made with CLion. Thanks to JetBrains for supporting open source! It's base64. What more could anyone want? -This library's goals are to be *correct* and *fast*. It's thoroughly tested and widely used. It exposes functionality at multiple levels of abstraction so you can choose the level of convenience vs performance that you want, e.g. `decode_engine_slice` decodes into an existing `&mut [u8]` and is pretty fast (2.6GiB/s for a 3 KiB input), whereas `decode_engine` allocates a new `Vec` and returns it, which might be more convenient in some cases, but is slower (although still fast enough for almost any purpose) at 2.1 GiB/s. +This library's goals are to be *correct* and *fast*. It's thoroughly tested and widely used. It exposes functionality at +multiple levels of abstraction so you can choose the level of convenience vs performance that you want, +e.g. `decode_engine_slice` decodes into an existing `&mut [u8]` and is pretty fast (2.6GiB/s for a 3 KiB input), +whereas `decode_engine` allocates a new `Vec` and returns it, which might be more convenient in some cases, but is +slower (although still fast enough for almost any purpose) at 2.1 GiB/s. ## Example @@ -32,7 +36,8 @@ See the [docs](https://docs.rs/base64) for all the details. Remove non-base64 characters from your input before decoding. -If you have a `Vec` of base64, [retain](https://doc.rust-lang.org/std/vec/struct.Vec.html#method.retain) can be used to strip out whatever you need removed. +If you have a `Vec` of base64, [retain](https://doc.rust-lang.org/std/vec/struct.Vec.html#method.retain) can be used to +strip out whatever you need removed. If you have a `Read` (e.g. reading a file or network socket), there are various approaches. @@ -43,15 +48,45 @@ If you have a `Read` (e.g. reading a file or network socket), there are various [line-wrap](https://crates.io/crates/line-wrap) does just that. +### I want canonical base64 encoding/decoding. + +First, don't do this. You should no more expect Base64 to be canonical than you should expect compression algorithms to +produce canonical output across all usage in the wild (hint: they don't). +However, [people are drawn to their own destruction like moths to a flame](https://eprint.iacr.org/2022/361), so here we +are. + +There are two opportunities for non-canonical encoding (and thus, detection of the same during decoding): the final bits +of the last encoded token in two or three token suffixes, and the `=` token used to inflate the suffix to a full four +tokens. + +The trailing bits issue is unavoidable: with 6 bits available in each encoded token, 1 input byte takes 2 tokens, +with the second one having some bits unused. Same for two input bytes: 16 bits, but 3 tokens have 18 bits. Unless we +decide to stop shipping whole bytes around, we're stuck with those extra bits that a sneaky or buggy encoder might set +to 1 instead of 0. + +The `=` pad bytes, on the other hand, are entirely a self-own by the Base64 standard. They do not affect decoding other +than to provide an opportunity to say "that padding is incorrect". Exabytes of storage and transfer have no doubt been +wasted on pointless `=` bytes. Somehow we all seem to be quite comfortable with, say, hex-encoded data just stopping +when it's done rather than requiring a confirmation that the author of the encoder could count to four. Anyway, there +are two ways to make pad bytes predictable: require canonical padding to the next multiple of four bytes as per the RFC, +or, if you control all producers and consumers, save a few bytes by requiring no padding (especially applicable to the +url-safe alphabet). + +All `Engine` implementations must at a minimum support treating non-canonical padding of both types as an error, and +optionally may allow other behaviors. + ## Rust version compatibility -The minimum required Rust version is 1.57.0. +The minimum supported Rust version is 1.57.0. # Contributing -Contributions are very welcome. However, because this library is used widely, and in security-sensitive contexts, all PRs will be carefully scrutinized. Beyond that, this sort of low level library simply needs to be 100% correct. Nobody wants to chase bugs in encoding of any sort. +Contributions are very welcome. However, because this library is used widely, and in security-sensitive contexts, all +PRs will be carefully scrutinized. Beyond that, this sort of low level library simply needs to be 100% correct. Nobody +wants to chase bugs in encoding of any sort. -All this means that it takes me a fair amount of time to review each PR, so it might take quite a while to carve out the free time to give each PR the attention it deserves. I will get to everyone eventually! +All this means that it takes me a fair amount of time to review each PR, so it might take quite a while to carve out the +free time to give each PR the attention it deserves. I will get to everyone eventually! ## Developing @@ -63,13 +98,22 @@ rustup run nightly cargo bench ## no_std -This crate supports no_std. By default the crate targets std via the `std` feature. You can deactivate the `default-features` to target core instead. In that case you lose out on all the functionality revolving around `std::io`, `std::error::Error` and heap allocations. There is an additional `alloc` feature that you can activate to bring back the support for heap allocations. +This crate supports no_std. By default the crate targets std via the `std` feature. You can deactivate +the `default-features` to target `core` instead. In that case you lose out on all the functionality revolving +around `std::io`, `std::error::Error`, and heap allocations. There is an additional `alloc` feature that you can activate +to bring back the support for heap allocations. ## Profiling -On Linux, you can use [perf](https://perf.wiki.kernel.org/index.php/Main_Page) for profiling. Then compile the benchmarks with `rustup nightly run cargo bench --no-run`. +On Linux, you can use [perf](https://perf.wiki.kernel.org/index.php/Main_Page) for profiling. Then compile the +benchmarks with `rustup nightly run cargo bench --no-run`. -Run the benchmark binary with `perf` (shown here filtering to one particular benchmark, which will make the results easier to read). `perf` is only available to the root user on most systems as it fiddles with event counters in your CPU, so use `sudo`. We need to run the actual benchmark binary, hence the path into `target`. You can see the actual full path with `rustup run nightly cargo bench -v`; it will print out the commands it runs. If you use the exact path that `bench` outputs, make sure you get the one that's for the benchmarks, not the tests. You may also want to `cargo clean` so you have only one `benchmarks-` binary (they tend to accumulate). +Run the benchmark binary with `perf` (shown here filtering to one particular benchmark, which will make the results +easier to read). `perf` is only available to the root user on most systems as it fiddles with event counters in your +CPU, so use `sudo`. We need to run the actual benchmark binary, hence the path into `target`. You can see the actual +full path with `rustup run nightly cargo bench -v`; it will print out the commands it runs. If you use the exact path +that `bench` outputs, make sure you get the one that's for the benchmarks, not the tests. You may also want +to `cargo clean` so you have only one `benchmarks-` binary (they tend to accumulate). ```bash sudo perf record target/release/deps/benchmarks-* --bench decode_10mib_reuse @@ -81,7 +125,10 @@ Then analyze the results, again with perf: sudo perf annotate -l ``` -You'll see a bunch of interleaved rust source and assembly like this. The section with `lib.rs:327` is telling us that 4.02% of samples saw the `movzbl` aka bit shift as the active instruction. However, this percentage is not as exact as it seems due to a phenomenon called *skid*. Basically, a consequence of how fancy modern CPUs are is that this sort of instruction profiling is inherently inaccurate, especially in branch-heavy code. +You'll see a bunch of interleaved rust source and assembly like this. The section with `lib.rs:327` is telling us that +4.02% of samples saw the `movzbl` aka bit shift as the active instruction. However, this percentage is not as exact as +it seems due to a phenomenon called *skid*. Basically, a consequence of how fancy modern CPUs are is that this sort of +instruction profiling is inherently inaccurate, especially in branch-heavy code. ```text lib.rs:322 0.70 : 10698: mov %rdi,%rax @@ -103,10 +150,10 @@ You'll see a bunch of interleaved rust source and assembly like this. The sectio 0.00 : 106ab: je 1090e ``` - ## Fuzzing -This uses [cargo-fuzz](https://github.com/rust-fuzz/cargo-fuzz). See `fuzz/fuzzers` for the available fuzzing scripts. To run, use an invocation like these: +This uses [cargo-fuzz](https://github.com/rust-fuzz/cargo-fuzz). See `fuzz/fuzzers` for the available fuzzing scripts. +To run, use an invocation like these: ```bash cargo +nightly fuzz run roundtrip @@ -115,7 +162,6 @@ cargo +nightly fuzz run roundtrip_random_config -- -max_len=10240 cargo +nightly fuzz run decode_random ``` - ## License This project is dual-licensed under MIT and Apache 2.0. diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 7d8f04f..0e04202 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -1,8 +1,12 @@ # 0.20.0 -## Next +## 0.20.0-beta.1 + +### Breaking changes - Update MSRV to 1.57.0 +- Decoding can now either ignore padding, require correct padding, or require no padding. The default is to require correct padding. + - The `NO_PAD` config now requires that padding be absent when decoding. ## 0.20.0-alpha.1 diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 8b74839..ec3324b 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -11,7 +11,8 @@ cargo-fuzz = true [dependencies] rand = "0.6.1" rand_pcg = "0.1.1" -ring = "0.13.5" +sha2 = "0.10.6" + [dependencies.base64] path = ".." [dependencies.libfuzzer-sys] diff --git a/fuzz/fuzzers/roundtrip_no_pad.rs b/fuzz/fuzzers/roundtrip_no_pad.rs index 23cff9e..d5ff485 100644 --- a/fuzz/fuzzers/roundtrip_no_pad.rs +++ b/fuzz/fuzzers/roundtrip_no_pad.rs @@ -1,11 +1,14 @@ #![no_main] -#[macro_use] extern crate libfuzzer_sys; +#[macro_use] +extern crate libfuzzer_sys; extern crate base64; -use base64::engine::fast_portable; +use base64::engine::{self, fast_portable}; fuzz_target!(|data: &[u8]| { - let config = fast_portable::FastPortableConfig::new().with_encode_padding(false); + let config = fast_portable::FastPortableConfig::new() + .with_encode_padding(false) + .with_decode_padding_mode(engine::DecodePaddingMode::RequireNone); let engine = fast_portable::FastPortable::from(&base64::alphabet::STANDARD, config); let encoded = base64::encode_engine(&data, &engine); diff --git a/fuzz/fuzzers/utils.rs b/fuzz/fuzzers/utils.rs index c1d7edf..0158032 100644 --- a/fuzz/fuzzers/utils.rs +++ b/fuzz/fuzzers/utils.rs @@ -1,18 +1,20 @@ extern crate rand; extern crate rand_pcg; -extern crate ring; +extern crate sha2; -use base64::{alphabet, engine::fast_portable}; +use base64::{alphabet, engine::{self, fast_portable}}; use self::rand::{Rng, SeedableRng}; use self::rand_pcg::Pcg32; -use self::ring::digest; +use self::sha2::Digest as _; pub fn random_engine(data: &[u8]) -> fast_portable::FastPortable { // use sha256 of data as rng seed so it's repeatable - let sha = digest::digest(&digest::SHA256, data); + let mut hasher = sha2::Sha256::new(); + hasher.update(data); + let sha = hasher.finalize(); let mut seed: [u8; 16] = [0; 16]; - seed.copy_from_slice(&sha.as_ref()[0..16]); + seed.copy_from_slice(&sha.as_slice()[0..16]); let mut rng = Pcg32::from_seed(seed); @@ -22,9 +24,16 @@ pub fn random_engine(data: &[u8]) -> fast_portable::FastPortable { alphabet::STANDARD }; + let encode_padding = rng.gen(); + let decode_padding = if encode_padding { + engine::DecodePaddingMode::RequireCanonical + } else { + engine::DecodePaddingMode::RequireNone + }; let config = fast_portable::FastPortableConfig::new() - .with_encode_padding(rng.gen()) - .with_decode_allow_trailing_bits(rng.gen()); + .with_encode_padding(encode_padding) + .with_decode_allow_trailing_bits(rng.gen()) + .with_decode_padding_mode(decode_padding); fast_portable::FastPortable::from(&alphabet, config) } diff --git a/src/decode.rs b/src/decode.rs index 04db879..f6d636b 100644 --- a/src/decode.rs +++ b/src/decode.rs @@ -13,6 +13,7 @@ use std::error; #[derive(Clone, Debug, PartialEq, Eq)] pub enum DecodeError { /// An invalid byte was found in the input. The offset and offending byte are provided. + /// Padding characters (`=`) interspersed in the encoded form will be treated as invalid bytes. InvalidByte(usize, u8), /// The length of the input is invalid. /// A typical cause of this is stray trailing whitespace or other separator bytes. @@ -22,9 +23,12 @@ pub enum DecodeError { InvalidLength, /// The last non-padding input symbol's encoded 6 bits have nonzero bits that will be discarded. /// This is indicative of corrupted or truncated Base64. - /// Unlike InvalidByte, which reports symbols that aren't in the alphabet, this error is for + /// Unlike `InvalidByte`, which reports symbols that aren't in the alphabet, this error is for /// symbols that are in the alphabet but represent nonsensical encodings. InvalidLastSymbol(usize, u8), + /// The nature of the padding was not as configured: absent or incorrect when it must be + /// canonical, or present when it must be absent, etc. + InvalidPadding, } impl fmt::Display for DecodeError { @@ -35,6 +39,7 @@ impl fmt::Display for DecodeError { Self::InvalidLastSymbol(index, byte) => { write!(f, "Invalid last symbol {}, offset {}.", byte, index) } + Self::InvalidPadding => write!(f, "Invalid padding"), } } } @@ -46,6 +51,7 @@ impl error::Error for DecodeError { Self::InvalidByte(_, _) => "invalid byte", Self::InvalidLength => "invalid length", Self::InvalidLastSymbol(_, _) => "invalid last symbol", + Self::InvalidPadding => "invalid padding", } } @@ -192,10 +198,12 @@ pub fn decode_engine_slice>( #[cfg(test)] mod tests { use super::*; - use crate::{encode::encode_engine_string, tests::assert_encode_sanity}; - - use crate::engine::Config; - use crate::tests::random_engine; + use crate::{ + alphabet, + encode::encode_engine_string, + engine::{fast_portable, fast_portable::FastPortable, Config}, + tests::{assert_encode_sanity, random_engine}, + }; use rand::{ distributions::{Distribution, Uniform}, Rng, SeedableRng, @@ -350,12 +358,13 @@ mod tests { #[test] fn decode_engine_estimation_works_for_various_lengths() { + let engine = FastPortable::from(&alphabet::STANDARD, fast_portable::NO_PAD); for num_prefix_quads in 0..100 { for suffix in &["AA", "AAA", "AAAA"] { let mut prefix = "AAAA".repeat(num_prefix_quads); prefix.push_str(suffix); // make sure no overflow (and thus a panic) occurs - let res = decode_engine(prefix, &DEFAULT_ENGINE); + let res = decode_engine(prefix, &engine); assert!(res.is_ok()); } } diff --git a/src/encode.rs b/src/encode.rs index b9c284b..575beb6 100644 --- a/src/encode.rs +++ b/src/encode.rs @@ -123,7 +123,7 @@ pub fn encode_engine_string>( /// &base64::engine::DEFAULT_ENGINE); /// /// // shorten our vec down to just what was written -/// buf.resize(bytes_written, 0); +/// buf.truncate(bytes_written); /// /// assert_eq!(s, base64::decode(&buf).unwrap().as_slice()); /// ``` diff --git a/src/engine/fast_portable/decode.rs b/src/engine/fast_portable/decode.rs index 08a0429..5b67043 100644 --- a/src/engine/fast_portable/decode.rs +++ b/src/engine/fast_portable/decode.rs @@ -1,6 +1,7 @@ -use crate::engine::fast_portable::INVALID_VALUE; -use crate::engine::DecodeEstimate; -use crate::{DecodeError, PAD_BYTE}; +use crate::{ + engine::{fast_portable::INVALID_VALUE, DecodeEstimate, DecodePaddingMode}, + DecodeError, PAD_BYTE, +}; // decode logic operates on chunks of 8 input bytes without padding const INPUT_CHUNK_LEN: usize = 8; @@ -48,12 +49,13 @@ impl DecodeEstimate for FastPortableEstimate { // inlined(always), a different slow. plain ol' inline makes the benchmarks happiest at the moment, // but this is fragile and the best setting changes with only minor code modifications. #[inline] -pub fn decode_helper( +pub(crate) fn decode_helper( input: &[u8], estimate: FastPortableEstimate, output: &mut [u8], decode_table: &[u8; 256], decode_allow_trailing_bits: bool, + padding_mode: DecodePaddingMode, ) -> Result { let remainder_len = input.len() % INPUT_CHUNK_LEN; @@ -81,7 +83,7 @@ pub fn decode_helper( // written by the fast decode loop. So, we have to ignore both these 2 bytes and the // previous chunk. 2 => INPUT_CHUNK_LEN + 2, - // If this is 3 unpadded chars, then it would actually decode to 2 bytes. However, if this + // If this is 3 un-padded chars, then it would actually decode to 2 bytes. However, if this // is an erroneous 2 chars + 1 pad char that would decode to 1 byte, then it should fail // with an error, not panic from going past the bounds of the output slice, so we let it // use stage 3 + 4. @@ -183,123 +185,15 @@ pub fn decode_helper( debug_assert!(input.len() - input_index > 1 || input.is_empty()); debug_assert!(input.len() - input_index <= 8); - // Stage 4 - // Finally, decode any leftovers that aren't a complete input block of 8 bytes. - // Use a u64 as a stack-resident 8 byte buffer. - let mut leftover_bits: u64 = 0; - let mut morsels_in_leftover = 0; - let mut padding_bytes = 0; - let mut first_padding_index: usize = 0; - let mut last_symbol = 0_u8; - let start_of_leftovers = input_index; - for (i, b) in input[start_of_leftovers..].iter().enumerate() { - // '=' padding - if *b == PAD_BYTE { - // There can be bad padding in a few ways: - // 1 - Padding with non-padding characters after it - // 2 - Padding after zero or one non-padding characters before it - // in the current quad. - // 3 - More than two characters of padding. If 3 or 4 padding chars - // are in the same quad, that implies it will be caught by #2. - // If it spreads from one quad to another, it will be an invalid byte - // in the first quad. - - if i % 4 < 2 { - // Check for case #2. - let bad_padding_index = start_of_leftovers - + if padding_bytes > 0 { - // If we've already seen padding, report the first padding index. - // This is to be consistent with the faster logic above: it will report an - // error on the first padding character (since it doesn't expect to see - // anything but actual encoded data). - first_padding_index - } else { - // haven't seen padding before, just use where we are now - i - }; - return Err(DecodeError::InvalidByte(bad_padding_index, *b)); - } - - if padding_bytes == 0 { - first_padding_index = i; - } - - padding_bytes += 1; - continue; - } - - // Check for case #1. - // To make '=' handling consistent with the main loop, don't allow - // non-suffix '=' in trailing chunk either. Report error as first - // erroneous padding. - if padding_bytes > 0 { - return Err(DecodeError::InvalidByte( - start_of_leftovers + first_padding_index, - PAD_BYTE, - )); - } - last_symbol = *b; - - // can use up to 8 * 6 = 48 bits of the u64, if last chunk has no padding. - // Pack the leftovers from left to right. - let shift = 64 - (morsels_in_leftover + 1) * 6; - let morsel = decode_table[*b as usize]; - if morsel == INVALID_VALUE { - return Err(DecodeError::InvalidByte(start_of_leftovers + i, *b)); - } - - leftover_bits |= (morsel as u64) << shift; - morsels_in_leftover += 1; - } - - // When encoding 1 trailing byte (e.g. 0xFF), 2 base64 bytes ("/w") are needed. - // / is the symbol for 63 (0x3F, bottom 6 bits all set) and w is 48 (0x30, top 2 bits - // of bottom 6 bits set). - // When decoding two symbols back to one trailing byte, any final symbol higher than - // w would still decode to the original byte because we only care about the top two - // bits in the bottom 6, but would be a non-canonical encoding. So, we calculate a - // mask based on how many bits are used for just the canonical encoding, and optionally - // error if any other bits are set. In the example of one encoded byte -> 2 symbols, - // 2 symbols can technically encode 12 bits, but the last 4 are non canonical, and - // useless since there are no more symbols to provide the necessary 4 additional bits - // to finish the second original byte. - - let leftover_bits_ready_to_append = match morsels_in_leftover { - 0 => 0, - 2 => 8, - 3 => 16, - 4 => 24, - 6 => 32, - 7 => 40, - 8 => 48, - _ => unreachable!( - "Impossible: must only have 0 to 8 input bytes in last chunk, with no invalid lengths" - ), - }; - - // if there are bits set outside the bits we care about, last symbol encodes trailing bits that - // will not be included in the output - let mask = !0 >> leftover_bits_ready_to_append; - if !decode_allow_trailing_bits && (leftover_bits & mask) != 0 { - // last morsel is at `morsels_in_leftover` - 1 - return Err(DecodeError::InvalidLastSymbol( - start_of_leftovers + morsels_in_leftover - 1, - last_symbol, - )); - } - - // TODO benchmark simply converting to big endian bytes - let mut leftover_bits_appended_to_buf = 0; - while leftover_bits_appended_to_buf < leftover_bits_ready_to_append { - // `as` simply truncates the higher bits, which is what we want here - let selected_bits = (leftover_bits >> (56 - leftover_bits_appended_to_buf)) as u8; - output[output_index] = selected_bits; - output_index += 1; - - leftover_bits_appended_to_buf += 8; - } - - Ok(output_index) + super::decode_suffix::decode_suffix( + input, + input_index, + output, + output_index, + decode_table, + decode_allow_trailing_bits, + padding_mode, + ) } /// Decode 8 bytes of input into 6 bytes of output. 8 bytes of output will be written, but only the diff --git a/src/engine/fast_portable/decode_suffix.rs b/src/engine/fast_portable/decode_suffix.rs new file mode 100644 index 0000000..8f896b2 --- /dev/null +++ b/src/engine/fast_portable/decode_suffix.rs @@ -0,0 +1,161 @@ +use crate::{ + engine::{fast_portable::INVALID_VALUE, DecodePaddingMode}, + DecodeError, PAD_BYTE, +}; + +/// Decode the last 1-8 bytes, checking for trailing set bits and padding per the provided +/// parameters. +/// +/// Returns the total number of bytes decoded, including the ones indicated as already written by +/// `output_index`. +pub(crate) fn decode_suffix( + input: &[u8], + input_index: usize, + output: &mut [u8], + mut output_index: usize, + decode_table: &[u8; 256], + decode_allow_trailing_bits: bool, + padding_mode: DecodePaddingMode, +) -> Result { + // Decode any leftovers that aren't a complete input block of 8 bytes. + // Use a u64 as a stack-resident 8 byte buffer. + let mut leftover_bits: u64 = 0; + let mut morsels_in_leftover = 0; + let mut padding_bytes = 0; + let mut first_padding_index: usize = 0; + let mut last_symbol = 0_u8; + let start_of_leftovers = input_index; + + for (i, &b) in input[start_of_leftovers..].iter().enumerate() { + // '=' padding + if b == PAD_BYTE { + // There can be bad padding bytes in a few ways: + // 1 - Padding with non-padding characters after it + // 2 - Padding after zero or one characters in the current quad (should only + // be after 2 or 3 chars) + // 3 - More than two characters of padding. If 3 or 4 padding chars + // are in the same quad, that implies it will be caught by #2. + // If it spreads from one quad to another, it will be an invalid byte + // in the first quad. + // 4 - Non-canonical padding -- 1 byte when it should be 2, etc. + // Per config, non-canonical but still functional non- or partially-padded base64 + // may be treated as an error condition. + + if i % 4 < 2 { + // Check for case #2. + let bad_padding_index = start_of_leftovers + + if padding_bytes > 0 { + // If we've already seen padding, report the first padding index. + // This is to be consistent with the normal decode logic: it will report an + // error on the first padding character (since it doesn't expect to see + // anything but actual encoded data). + // This could only happen if the padding started in the previous quad since + // otherwise this case would have been hit at i % 4 == 0 if it was the same + // quad. + first_padding_index + } else { + // haven't seen padding before, just use where we are now + i + }; + return Err(DecodeError::InvalidByte(bad_padding_index, b)); + } + + if padding_bytes == 0 { + first_padding_index = i; + } + + padding_bytes += 1; + continue; + } + + // Check for case #1. + // To make '=' handling consistent with the main loop, don't allow + // non-suffix '=' in trailing chunk either. Report error as first + // erroneous padding. + if padding_bytes > 0 { + return Err(DecodeError::InvalidByte( + start_of_leftovers + first_padding_index, + PAD_BYTE, + )); + } + + last_symbol = b; + + // can use up to 8 * 6 = 48 bits of the u64, if last chunk has no padding. + // Pack the leftovers from left to right. + let shift = 64 - (morsels_in_leftover + 1) * 6; + let morsel = decode_table[b as usize]; + if morsel == INVALID_VALUE { + return Err(DecodeError::InvalidByte(start_of_leftovers + i, b)); + } + + leftover_bits |= (morsel as u64) << shift; + morsels_in_leftover += 1; + } + + match padding_mode { + DecodePaddingMode::Indifferent => { /* everything we care about was already checked */ } + DecodePaddingMode::RequireCanonical => { + if (padding_bytes + morsels_in_leftover) % 4 != 0 { + return Err(DecodeError::InvalidPadding); + } + } + DecodePaddingMode::RequireNone => { + if padding_bytes > 0 { + // check at the end to make sure we let the cases of padding that should be InvalidByte + // get hit + return Err(DecodeError::InvalidPadding); + } + } + } + + // When encoding 1 trailing byte (e.g. 0xFF), 2 base64 bytes ("/w") are needed. + // / is the symbol for 63 (0x3F, bottom 6 bits all set) and w is 48 (0x30, top 2 bits + // of bottom 6 bits set). + // When decoding two symbols back to one trailing byte, any final symbol higher than + // w would still decode to the original byte because we only care about the top two + // bits in the bottom 6, but would be a non-canonical encoding. So, we calculate a + // mask based on how many bits are used for just the canonical encoding, and optionally + // error if any other bits are set. In the example of one encoded byte -> 2 symbols, + // 2 symbols can technically encode 12 bits, but the last 4 are non canonical, and + // useless since there are no more symbols to provide the necessary 4 additional bits + // to finish the second original byte. + + let leftover_bits_ready_to_append = match morsels_in_leftover { + 0 => 0, + 2 => 8, + 3 => 16, + 4 => 24, + 6 => 32, + 7 => 40, + 8 => 48, + // can also be detected as case #2 bad padding above + _ => unreachable!( + "Impossible: must only have 0 to 8 input bytes in last chunk, with no invalid lengths" + ), + }; + + // if there are bits set outside the bits we care about, last symbol encodes trailing bits that + // will not be included in the output + let mask = !0 >> leftover_bits_ready_to_append; + if !decode_allow_trailing_bits && (leftover_bits & mask) != 0 { + // last morsel is at `morsels_in_leftover` - 1 + return Err(DecodeError::InvalidLastSymbol( + start_of_leftovers + morsels_in_leftover - 1, + last_symbol, + )); + } + + // TODO benchmark simply converting to big endian bytes + let mut leftover_bits_appended_to_buf = 0; + while leftover_bits_appended_to_buf < leftover_bits_ready_to_append { + // `as` simply truncates the higher bits, which is what we want here + let selected_bits = (leftover_bits >> (56 - leftover_bits_appended_to_buf)) as u8; + output[output_index] = selected_bits; + output_index += 1; + + leftover_bits_appended_to_buf += 8; + } + + Ok(output_index) +} diff --git a/src/engine/fast_portable/mod.rs b/src/engine/fast_portable/mod.rs index de39eb5..9eef9b1 100644 --- a/src/engine/fast_portable/mod.rs +++ b/src/engine/fast_portable/mod.rs @@ -1,10 +1,13 @@ //! Provides the [FastPortable] engine and associated config types. -use crate::alphabet::Alphabet; -use crate::engine::Config; -use crate::DecodeError; +use crate::{ + alphabet::Alphabet, + engine::{Config, DecodePaddingMode}, + DecodeError, +}; use core::convert::TryInto; mod decode; +pub(crate) mod decode_suffix; pub use decode::FastPortableEstimate; pub(crate) const INVALID_VALUE: u8 = 255; @@ -173,6 +176,7 @@ impl super::Engine for FastPortable { output, &self.decode_table, self.config.decode_allow_trailing_bits, + self.config.decode_padding_mode, ) } @@ -237,10 +241,12 @@ fn read_u64(s: &[u8]) -> u64 { pub struct FastPortableConfig { encode_padding: bool, decode_allow_trailing_bits: bool, + decode_padding_mode: DecodePaddingMode, } impl FastPortableConfig { - /// Create a new config with `padding` = `true` and `decode_allow_trailing_bits` = `false`. + /// Create a new config with `padding` = `true`, `decode_allow_trailing_bits` = `false`, and + /// `decode_padding_mode = DecodePaddingMode::RequireCanonicalPadding`. /// /// This probably matches most people's expectations, but consider disabling padding to save /// a few bytes unless you specifically need it for compatibility with some legacy system. @@ -249,13 +255,14 @@ impl FastPortableConfig { // RFC states that padding must be applied by default encode_padding: true, decode_allow_trailing_bits: false, + decode_padding_mode: DecodePaddingMode::RequireCanonical, } } - /// Create a new config based on `self` with an updated `padding` parameter. + /// Create a new config based on `self` with an updated `padding` setting. /// - /// If `true`, encoding will append either 1 or 2 `=` padding characters to produce an - /// output whose length is a multiple of 4. + /// If `padding` is `true`, encoding will append either 1 or 2 `=` padding characters as needed + /// to produce an output whose length is a multiple of 4. /// /// Padding is not needed for correct decoding and only serves to waste bytes, but it's in the /// [spec](https://datatracker.ietf.org/doc/html/rfc4648#section-3.2). @@ -269,7 +276,7 @@ impl FastPortableConfig { } } - /// Create a new config based on `self` with an updated `decode_allow_trailing_bits` parameter. + /// Create a new config based on `self` with an updated `decode_allow_trailing_bits` setting. /// /// Most users will not need to configure this. It's useful if you need to decode base64 /// produced by a buggy encoder that has bits set in the unused space on the last base64 @@ -282,6 +289,26 @@ impl FastPortableConfig { ..self } } + + /// Create a new config based on `self` with an updated `decode_padding_mode` setting. + /// + /// Padding is not useful in terms of representing encoded data -- it makes no difference to + /// the decoder if padding is present or not, so if you have some un-padded input to decode, it + /// is perfectly fine to use `DecodePaddingMode::Indifferent` to prevent errors from being + /// emitted. + /// + /// However, since in practice + /// [people who learned nothing from BER vs DER seem to expect base64 to have one canonical encoding](https://eprint.iacr.org/2022/361), + /// the default setting is the stricter `DecodePaddingMode::RequireCanonicalPadding`. + /// + /// Or, if "canonical" in your circumstance means _no_ padding rather than padding to the + /// next multiple of four, there's `DecodePaddingMode::RequireNoPadding`. + pub const fn with_decode_padding_mode(self, mode: DecodePaddingMode) -> Self { + Self { + decode_padding_mode: mode, + ..self + } + } } impl Default for FastPortableConfig { @@ -297,11 +324,13 @@ impl Config for FastPortableConfig { } } -/// Include padding bytes when encoding. +/// Include padding bytes when encoding, and require that they be present when decoding. /// /// This is the standard per the base64 RFC, but consider using [NO_PAD] instead as padding serves /// little purpose in practice. pub const PAD: FastPortableConfig = FastPortableConfig::new(); -/// Don't add padding when encoding. -pub const NO_PAD: FastPortableConfig = FastPortableConfig::new().with_encode_padding(false); +/// Don't add padding when encoding, and require no padding when decoding. +pub const NO_PAD: FastPortableConfig = FastPortableConfig::new() + .with_encode_padding(false) + .with_decode_padding_mode(DecodePaddingMode::RequireNone); diff --git a/src/engine/mod.rs b/src/engine/mod.rs index 764653f..ad26bb3 100644 --- a/src/engine/mod.rs +++ b/src/engine/mod.rs @@ -59,6 +59,9 @@ pub trait Engine: Send + Sync { /// [RFC](https://tools.ietf.org/html/rfc4648#section-3.5). /// /// Decoding must not write any bytes into the output slice other than the decoded data. + /// + /// Non-canonical trailing bits in the final tokens or non-canonical padding must be reported as + /// errors unless the engine is configured otherwise. fn decode( &self, input: &[u8], @@ -97,3 +100,17 @@ pub trait DecodeEstimate { /// A [FastPortable] engine using the [crate::alphabet::STANDARD] base64 alphabet and [crate::engine::fast_portable::PAD] config. pub const DEFAULT_ENGINE: FastPortable = FastPortable::from(&alphabet::STANDARD, fast_portable::PAD); + +/// Controls how pad bytes are handled when decoding. +/// +/// Each [Engine] must support at least the behavior indicated by +/// [DecodePaddingMode::RequireCanonical], and may support other modes. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum DecodePaddingMode { + /// Canonical padding is allowed, but any fewer padding bytes than that is also allowed. + Indifferent, + /// Padding must be canonical (0, 1, or 2 `=` as needed to produce a 4 byte suffix). + RequireCanonical, + /// Padding must be absent -- for when you want predictable padding, without any wasted bytes. + RequireNone, +} diff --git a/src/engine/naive.rs b/src/engine/naive.rs index a873606..138b821 100644 --- a/src/engine/naive.rs +++ b/src/engine/naive.rs @@ -1,7 +1,11 @@ -use crate::alphabet::Alphabet; -use crate::engine::fast_portable::{decode_table, encode_table}; -use crate::engine::{fast_portable, Config, DecodeEstimate, Engine}; -use crate::{DecodeError, PAD_BYTE}; +use crate::{ + alphabet::Alphabet, + engine::{ + fast_portable::{self, decode_table, encode_table}, + Config, DecodeEstimate, DecodePaddingMode, Engine, + }, + DecodeError, PAD_BYTE, +}; use alloc::ops::BitOr; use std::ops::{BitAnd, Shl, Shr}; @@ -159,105 +163,15 @@ impl Engine for Naive { } } - // handle incomplete chunk -- simplified version of FastPortable - let mut leftover_bits: u32 = 0; - let mut morsels_in_leftover = 0; - let mut padding_bytes = 0; - let mut first_padding_index: usize = 0; - let mut last_symbol = 0_u8; - let start_of_leftovers = input_index; - for (index, byte) in input[start_of_leftovers..].iter().enumerate() { - // '=' padding - if *byte == PAD_BYTE { - // There can be bad padding in a few ways: - // 1 - Padding with non-padding characters after it - // 2 - Padding after zero or one non-padding characters before it - // in the current quad. - // 3 - More than two characters of padding. If 3 or 4 padding chars - // are in the same quad, that implies it will be caught by #2. - // If it spreads from one quad to another, it will be an invalid byte - // in the first quad. - - if index < 2 { - // Check for case #2. - let bad_padding_index = start_of_leftovers - + if padding_bytes > 0 { - // If we've already seen padding, report the first padding index. - // This is to be consistent with the faster logic above: it will report an - // error on the first padding character (since it doesn't expect to see - // anything but actual encoded data). - first_padding_index - } else { - // haven't seen padding before, just use where we are now - index - }; - return Err(DecodeError::InvalidByte(bad_padding_index, *byte)); - } - - if padding_bytes == 0 { - first_padding_index = index; - } - - padding_bytes += 1; - continue; - } - - // Check for case #1. - // To make '=' handling consistent with the main loop, don't allow - // non-suffix '=' in trailing chunk either. Report error as first - // erroneous padding. - if padding_bytes > 0 { - return Err(DecodeError::InvalidByte( - start_of_leftovers + first_padding_index, - PAD_BYTE, - )); - } - last_symbol = *byte; - - // can use up to 4 * 6 = 24 bits of the u32, if last chunk has no padding. - // Pack the leftovers from left to right. - let shift = 32 - (morsels_in_leftover + 1) * 6; - let morsel = self.decode_table[*byte as usize]; - if morsel == fast_portable::INVALID_VALUE { - return Err(DecodeError::InvalidByte(start_of_leftovers + index, *byte)); - } - - leftover_bits |= (morsel as u32) << shift; - morsels_in_leftover += 1; - } - - let leftover_bits_ready_to_append = match morsels_in_leftover { - 0 => 0, - 2 => 8, - 3 => 16, - 4 => 24, - _ => unreachable!( - "Impossible: must only have 0 to 4 input bytes in last chunk, with no invalid lengths" - ), - }; - - // if there are bits set outside the bits we care about, last symbol encodes trailing - // bits that will not be included in the output - let mask = !0 >> leftover_bits_ready_to_append; - if !self.config.decode_allow_trailing_bits && (leftover_bits & mask) != 0 { - // last morsel is at `morsels_in_leftover` - 1 - return Err(DecodeError::InvalidLastSymbol( - start_of_leftovers + morsels_in_leftover - 1, - last_symbol, - )); - } - - let mut leftover_bits_appended_to_buf = 0; - while leftover_bits_appended_to_buf < leftover_bits_ready_to_append { - // `as` simply truncates the higher bits, which is what we want here - let selected_bits = (leftover_bits >> (24 - leftover_bits_appended_to_buf)) as u8; - output[output_index] = selected_bits; - output_index += 1; - - leftover_bits_appended_to_buf += 8; - } - - Ok(output_index) + fast_portable::decode_suffix::decode_suffix( + input, + input_index, + output, + output_index, + &self.decode_table, + self.config.decode_allow_trailing_bits, + self.config.decode_padding_mode, + ) } fn config(&self) -> &Self::Config { @@ -292,12 +206,13 @@ impl DecodeEstimate for NaiveEstimate { #[derive(Clone, Copy, Debug)] pub struct NaiveConfig { - pub padding: bool, + pub encode_padding: bool, pub decode_allow_trailing_bits: bool, + pub decode_padding_mode: DecodePaddingMode, } impl Config for NaiveConfig { fn encode_padding(&self) -> bool { - self.padding + self.encode_padding } } diff --git a/src/engine/tests.rs b/src/engine/tests.rs index 8dbca93..6f9140d 100644 --- a/src/engine/tests.rs +++ b/src/engine/tests.rs @@ -1,16 +1,18 @@ // rstest_reuse template functions have unused variables #![allow(unused_variables)] -use rand::{self, distributions::Distribution, distributions::Uniform, Rng, SeedableRng}; +use rand::{ + self, distributions, distributions::Distribution as _, rngs, Rng as _, SeedableRng as _, +}; use rstest::rstest; use rstest_reuse::{apply, template}; +use std::{collections, fmt}; -use crate::tests::assert_encode_sanity; use crate::{ alphabet::{Alphabet, STANDARD}, - decode_engine, encode, - engine::{fast_portable, naive, Engine}, - tests::random_alphabet, + decode_engine, encode, encode_engine_slice, + engine::{fast_portable, naive, Config, DecodePaddingMode, Engine}, + tests::{assert_encode_sanity, random_alphabet, random_config}, DecodeError, PAD_BYTE, }; @@ -26,31 +28,32 @@ fn all_engines(engine_wrapper: E) {} fn rfc_test_vectors_std_alphabet(engine_wrapper: E) { let data = vec![ ("", ""), - ("f", "Zg"), - ("fo", "Zm8"), + ("f", "Zg=="), + ("fo", "Zm8="), ("foo", "Zm9v"), - ("foob", "Zm9vYg"), - ("fooba", "Zm9vYmE"), + ("foob", "Zm9vYg=="), + ("fooba", "Zm9vYmE="), ("foobar", "Zm9vYmFy"), ]; let engine = E::standard(); + let engine_no_padding = E::standard_unpadded(); for (orig, encoded) in &data { - let mut encode_buf = [0_u8; 8]; - let mut decode_buf = [0_u8; 6]; - - let encode_len = engine.encode(orig.as_bytes(), &mut encode_buf[..]); - - assert_eq!( - encoded, - &std::str::from_utf8(&encode_buf[0..encode_len]).unwrap() - ); + let encoded_without_padding = encoded.trim_end_matches('='); // unpadded { - let decode_len = engine - .decode_ez(&encode_buf[0..encode_len], &mut decode_buf[..]) + let mut encode_buf = [0_u8; 8]; + let mut decode_buf = [0_u8; 6]; + + let encode_len = engine_no_padding.encode(orig.as_bytes(), &mut encode_buf[..]); + assert_eq!( + &encoded_without_padding, + &std::str::from_utf8(&encode_buf[0..encode_len]).unwrap() + ); + let decode_len = engine_no_padding + .decode_ez(encoded_without_padding.as_bytes(), &mut decode_buf[..]) .unwrap(); assert_eq!(orig.len(), decode_len); @@ -58,14 +61,32 @@ fn rfc_test_vectors_std_alphabet(engine_wrapper: E) { orig, &std::str::from_utf8(&decode_buf[0..decode_len]).unwrap() ); + + // if there was any padding originally, the no padding engine won't decode it + if encoded.as_bytes().contains(&PAD_BYTE) { + assert_eq!( + Err(DecodeError::InvalidPadding), + engine_no_padding.decode_ez_str_vec(encoded) + ) + } } // padded { - let _ = encode::add_padding(orig.len(), &mut encode_buf[encode_len..]); + let mut encode_buf = [0_u8; 8]; + let mut decode_buf = [0_u8; 6]; + + let encode_len = engine.encode(orig.as_bytes(), &mut encode_buf[..]); + assert_eq!( + // doesn't have padding added yet + &encoded_without_padding, + &std::str::from_utf8(&encode_buf[0..encode_len]).unwrap() + ); + let pad_len = encode::add_padding(orig.len(), &mut encode_buf[encode_len..]); + assert_eq!(encoded.as_bytes(), &encode_buf[..encode_len + pad_len]); let decode_len = engine - .decode_ez(&encode_buf[0..encode_len], &mut decode_buf[..]) + .decode_ez(encoded.as_bytes(), &mut decode_buf[..]) .unwrap(); assert_eq!(orig.len(), decode_len); @@ -73,19 +94,27 @@ fn rfc_test_vectors_std_alphabet(engine_wrapper: E) { orig, &std::str::from_utf8(&decode_buf[0..decode_len]).unwrap() ); + + // if there was (canonical) padding, and we remove it, the standard engine won't decode + if encoded.as_bytes().contains(&PAD_BYTE) { + assert_eq!( + Err(DecodeError::InvalidPadding), + engine.decode_ez_str_vec(encoded_without_padding) + ) + } } } } #[apply(all_engines)] fn roundtrip_random(engine_wrapper: E) { - let mut rng = rand::rngs::SmallRng::from_entropy(); + let mut rng = seeded_rng(); let mut orig_data = Vec::::new(); let mut encode_buf = Vec::::new(); let mut decode_buf = Vec::::new(); - let len_range = Uniform::new(1, 1_000); + let len_range = distributions::Uniform::new(1, 1_000); for _ in 0..10_000 { let engine = E::random(&mut rng); @@ -116,18 +145,17 @@ fn roundtrip_random(engine_wrapper: E) { #[apply(all_engines)] fn encode_doesnt_write_extra_bytes(engine_wrapper: E) { - let mut rng = rand::rngs::SmallRng::from_entropy(); + let mut rng = seeded_rng(); let mut orig_data = Vec::::new(); let mut encode_buf = Vec::::new(); let mut encode_buf_backup = Vec::::new(); - let input_len_range = Uniform::new(0, 5); - let prefix_len_range = Uniform::new(0, 5); - let suffix_len_range = Uniform::new(0, 5); + let input_len_range = distributions::Uniform::new(0, 1000); for _ in 0..10_000 { let engine = E::random(&mut rng); + let padded = engine.config().encode_padding(); orig_data.clear(); encode_buf.clear(); @@ -135,16 +163,13 @@ fn encode_doesnt_write_extra_bytes(engine_wrapper: E) { let orig_len = fill_rand(&mut orig_data, &mut rng, &input_len_range); - // write a random prefix - let prefix_len = fill_rand(&mut encode_buf, &mut rng, &prefix_len_range); - let expected_encode_len_no_pad = engine_encoded_len(orig_len); - // leave space for encoded data - encode_buf.resize(expected_encode_len_no_pad + prefix_len, 0); - // and a random suffix - let suffix_len = fill_rand(&mut encode_buf, &mut rng, &suffix_len_range); - + let prefix_len = 1024; + // plenty of prefix and suffix + fill_rand_len(&mut encode_buf, &mut rng, prefix_len * 2 + orig_len * 2); encode_buf_backup.extend_from_slice(&encode_buf[..]); + let expected_encode_len_no_pad = encode::encoded_len(orig_len, false).unwrap(); + let encoded_len_no_pad = engine.encode(&orig_data[..], &mut encode_buf[prefix_len..]); assert_eq!(expected_encode_len_no_pad, encoded_len_no_pad); @@ -163,20 +188,38 @@ fn encode_doesnt_write_extra_bytes(engine_wrapper: E) { orig_len, ); - assert_eq!(orig_data, decode_engine(encoded_data, &engine).unwrap()); + // pad so we can decode it in case our random engine requires padding + let pad_len = if padded { + encode::add_padding(orig_len, &mut encode_buf[prefix_len + encoded_len_no_pad..]) + } else { + 0 + }; + + assert_eq!( + orig_data, + decode_engine( + &encode_buf[prefix_len..(prefix_len + encoded_len_no_pad + pad_len)], + &engine, + ) + .unwrap() + ); } } #[apply(all_engines)] -fn decode_doesnt_write_extra_bytes(engine_wrapper: E) { - let mut rng = rand::rngs::SmallRng::from_entropy(); +fn decode_doesnt_write_extra_bytes(engine_wrapper: E) +where + E: EngineWrapper, + <::Engine as Engine>::Config: fmt::Debug, +{ + let mut rng = seeded_rng(); let mut orig_data = Vec::::new(); let mut encode_buf = Vec::::new(); let mut decode_buf = Vec::::new(); let mut decode_buf_backup = Vec::::new(); - let len_range = Uniform::new(1, 1_000); + let len_range = distributions::Uniform::new(1, 1_000); for _ in 0..10_000 { let engine = E::random(&mut rng); @@ -187,105 +230,70 @@ fn decode_doesnt_write_extra_bytes(engine_wrapper: E) { decode_buf_backup.clear(); let orig_len = fill_rand(&mut orig_data, &mut rng, &len_range); - encode_buf.resize(engine_encoded_len(orig_len), 0); + encode_buf.resize(orig_len * 2 + 100, 0); - let encoded_len = engine.encode(&orig_data[..], &mut encode_buf[..]); + let encoded_len = encode_engine_slice(&orig_data[..], &mut encode_buf[..], &engine); + encode_buf.truncate(encoded_len); // oversize decode buffer so we can easily tell if it writes anything more than // just the decoded data - fill_rand_len(&mut decode_buf, &mut rng, (orig_len + 100) * 2); + let prefix_len = 1024; + // plenty of prefix and suffix + fill_rand_len(&mut decode_buf, &mut rng, prefix_len * 2 + orig_len * 2); decode_buf_backup.extend_from_slice(&decode_buf[..]); let dec_len = engine - .decode_ez(&encode_buf[0..encoded_len], &mut decode_buf[..]) + .decode_ez(&encode_buf, &mut decode_buf[prefix_len..]) .unwrap(); assert_eq!(orig_len, dec_len); - assert_eq!(&orig_data[..], &decode_buf[..dec_len]); - assert_eq!(&decode_buf_backup[dec_len..], &decode_buf[dec_len..]); - } -} - -#[apply(all_engines)] -fn decode_detect_invalid_last_symbol_one_byte(engine_wrapper: E) { - // 0xFF -> "/w==", so all letters > w, 0-9, and '+', '/' should get InvalidLastSymbol - let engine = E::standard(); - - assert_eq!(Ok(vec![0xFF]), engine.decode_ez_str_vec("/w==")); - assert_eq!( - Err(DecodeError::InvalidLastSymbol(1, b'x')), - engine.decode_ez_str_vec("/x==") - ); - assert_eq!( - Err(DecodeError::InvalidLastSymbol(1, b'z')), - engine.decode_ez_str_vec("/z==") - ); - assert_eq!( - Err(DecodeError::InvalidLastSymbol(1, b'0')), - engine.decode_ez_str_vec("/0==") - ); - assert_eq!( - Err(DecodeError::InvalidLastSymbol(1, b'9')), - engine.decode_ez_str_vec("/9==") - ); - assert_eq!( - Err(DecodeError::InvalidLastSymbol(1, b'+')), - engine.decode_ez_str_vec("/+==") - ); - assert_eq!( - Err(DecodeError::InvalidLastSymbol(1, b'/')), - engine.decode_ez_str_vec("//==") - ); - - // also works when it's not the only chunk - let mut prefix = String::new(); - for _ in 0..50 { - prefix.push_str("AAAA"); - - let mut input = prefix.clone(); - input.push_str("/x=="); - assert_eq!( - Err(DecodeError::InvalidLastSymbol(prefix.len() + 1, b'x')), - engine.decode_ez_str_vec(input.as_str()) + &orig_data[..], + &decode_buf[prefix_len..prefix_len + dec_len] + ); + assert_eq!(&decode_buf_backup[..prefix_len], &decode_buf[..prefix_len]); + assert_eq!( + &decode_buf_backup[prefix_len + dec_len..], + &decode_buf[prefix_len + dec_len..] ); } } #[apply(all_engines)] -fn decode_detect_invalid_last_symbol_two_bytes(engine_wrapper: E) { +fn decode_detect_invalid_last_symbol(engine_wrapper: E) { + // 0xFF -> "/w==", so all letters > w, 0-9, and '+', '/' should get InvalidLastSymbol let engine = E::standard(); - // example from https://github.com/marshallpierce/rust-base64/issues/75 - assert!(engine.decode_ez_str_vec("iYU=").is_ok()); - // trailing 01 - assert_eq!( - Err(DecodeError::InvalidLastSymbol(2, b'V')), - engine.decode_ez_str_vec("iYV=") - ); - // trailing 10 - assert_eq!( - Err(DecodeError::InvalidLastSymbol(2, b'W')), - engine.decode_ez_str_vec("iYW=") - ); - // trailing 11 - assert_eq!( - Err(DecodeError::InvalidLastSymbol(2, b'X')), - engine.decode_ez_str_vec("iYX=") - ); - - // also works when it's not the only chunk - let mut prefix = String::new(); - for _ in 0..50 { - prefix.push_str("AAAA"); + assert_eq!(Ok(vec![0x89, 0x85]), engine.decode_ez_str_vec("iYU=")); + assert_eq!(Ok(vec![0xFF]), engine.decode_ez_str_vec("/w==")); - let mut input = prefix.clone(); - input.push_str("iYX="); + for (suffix, offset) in vec![ + // suffix, offset of bad byte from start of suffix + ("/x==", 1_usize), + ("/z==", 1_usize), + ("/0==", 1_usize), + ("/9==", 1_usize), + ("/+==", 1_usize), + ("//==", 1_usize), + // trailing 01 + ("iYV=", 2_usize), + // trailing 10 + ("iYW=", 2_usize), + // trailing 11 + ("iYX=", 2_usize), + ] { + for prefix_quads in 0..256 { + let mut encoded = "AAAA".repeat(prefix_quads); + encoded.push_str(suffix); - assert_eq!( - Err(DecodeError::InvalidLastSymbol(prefix.len() + 2, b'X')), - engine.decode_ez_str_vec(input.as_str()) - ); + assert_eq!( + Err(DecodeError::InvalidLastSymbol( + encoded.len() - 4 + offset, + suffix.as_bytes()[offset], + )), + engine.decode_ez_str_vec(encoded.as_str()) + ); + } } } @@ -293,15 +301,11 @@ fn decode_detect_invalid_last_symbol_two_bytes(engine_wrapper: fn decode_detect_invalid_last_symbol_when_length_is_also_invalid( engine_wrapper: E, ) { - let mut rng = rand::rngs::SmallRng::from_entropy(); + let mut rng = seeded_rng(); // check across enough lengths that it would likely cover any implementation's various internal // small/large input division - for len in 0_usize..1000 { - if len % 4 != 1 { - continue; - } - + for len in (0_usize..256).map(|len| len * 4 + 1) { let engine = E::random_alphabet(&mut rng, &STANDARD); let mut input = vec![b'A'; len]; @@ -312,9 +316,9 @@ fn decode_detect_invalid_last_symbol_when_length_is_also_invalid(engine_wrapper: E) { let strict = E::standard(); - let forgiving = E::standard_forgiving(); + let forgiving = E::standard_allow_trailing_bits(); fn assert_tolerant_decode( engine: &E, @@ -464,7 +468,7 @@ fn decode_invalid_trailing_bits_ignored_when_configured(engine } let mut prefix = String::new(); - for _ in 0..50 { + for _ in 0..256 { let mut input = prefix.clone(); // example from https://github.com/marshallpierce/rust-base64/issues/75 @@ -490,13 +494,13 @@ fn decode_invalid_trailing_bits_ignored_when_configured(engine #[apply(all_engines)] fn decode_invalid_byte_error(engine_wrapper: E) { - let mut rng = rand::rngs::SmallRng::from_entropy(); + let mut rng = seeded_rng(); let mut orig_data = Vec::::new(); let mut encode_buf = Vec::::new(); let mut decode_buf = Vec::::new(); - let len_range = Uniform::new(1, 1_000); + let len_range = distributions::Uniform::new(1, 1_000); for _ in 0..10_000 { let alphabet = random_alphabet(&mut rng); @@ -529,7 +533,7 @@ fn decode_invalid_byte_error(engine_wrapper: E) { } }; - let invalid_range = Uniform::new(0, orig_len); + let invalid_range = distributions::Uniform::new(0, orig_len); let invalid_index = invalid_range.sample(&mut rng); encode_buf[invalid_index] = invalid_byte; @@ -543,52 +547,277 @@ fn decode_invalid_byte_error(engine_wrapper: E) { } } +/// Any amount of padding anywhere before the final non padding character = invalid byte at first +/// pad byte. +/// From this, we know padding must extend to the end of the input. #[apply(all_engines)] -fn decode_single_pad_byte_after_2_chars_in_trailing_quad_ok(engine_wrapper: E) { - let engine = E::standard(); +fn decode_padding_before_final_non_padding_char_error_invalid_byte( + engine_wrapper: E, +) { + let mut rng = seeded_rng(); - for num_prefix_quads in 0..50 { - let mut s: String = "ABCD".repeat(num_prefix_quads); - // weird padding, but should be allowed - s.push_str("Zg="); + // the different amounts of proper padding, w/ offset from end for the last non-padding char + let suffixes = vec![("/w==", 2), ("iYu=", 1), ("zzzz", 0)]; - let input_len = num_prefix_quads * 3 + 1; + let prefix_quads_range = distributions::Uniform::from(0..=256); - assert_eq!(input_len, engine.decode_ez_str_vec(&s).unwrap().len()); + for mode in all_pad_modes() { + // we don't encode so we don't care about encode padding + let engine = E::standard_with_pad_mode(true, mode); + + for _ in 0..100_000 { + for (suffix, offset) in suffixes.iter() { + let mut s = "ABCD".repeat(prefix_quads_range.sample(&mut rng)); + s.push_str(suffix); + let mut encoded = s.into_bytes(); + + // calculate a range to write padding into that leaves at least one non padding char + let last_non_padding_offset = encoded.len() - 1 - offset; + + // don't include last non padding char as it must stay not padding + let padding_end = rng.gen_range(0..last_non_padding_offset); + + // don't use more than 100 bytes of padding, but also use shorter lengths when + // padding_end is near the start of the encoded data to avoid biasing to padding + // the entire prefix on short lengths + let padding_len = rng.gen_range(1..=usize::min(100, padding_end + 1)); + let padding_start = padding_end.saturating_sub(padding_len); + + encoded[padding_start..=padding_end].fill(PAD_BYTE); + + assert_eq!( + Err(DecodeError::InvalidByte(padding_start, PAD_BYTE)), + engine.decode_ez_vec(&encoded), + ); + } + } + } +} + +/// Any amount of padding before final chunk that crosses over into final chunk with 1-4 bytes = +/// invalid byte at first pad byte (except for 1 byte suffix = invalid length). +/// From this we know the padding must start in the final chunk. +#[apply(all_engines)] +fn decode_padding_starts_before_final_chunk_error_invalid_byte( + engine_wrapper: E, +) { + let mut rng = seeded_rng(); + + // must have at least one prefix quad + let prefix_quads_range = distributions::Uniform::from(1..256); + // including 1 just to make sure that it really does produce invalid length + let suffix_pad_len_range = distributions::Uniform::from(1..=4); + for mode in all_pad_modes() { + // we don't encode so we don't care about encode padding + let engine = E::standard_with_pad_mode(true, mode); + for _ in 0..100_000 { + let suffix_len = suffix_pad_len_range.sample(&mut rng); + let mut encoded = "ABCD" + .repeat(prefix_quads_range.sample(&mut rng)) + .into_bytes(); + encoded.resize(encoded.len() + suffix_len, PAD_BYTE); + + // amount of padding must be long enough to extend back from suffix into previous + // quads + let padding_len = rng.gen_range(suffix_len + 1..encoded.len()); + // no non-padding after padding in this test, so padding goes to the end + let padding_start = encoded.len() - padding_len; + encoded[padding_start..].fill(PAD_BYTE); + + if suffix_len == 1 { + assert_eq!( + Err(DecodeError::InvalidLength), + engine.decode_ez_vec(&encoded), + ); + } else { + assert_eq!( + Err(DecodeError::InvalidByte(padding_start, PAD_BYTE)), + engine.decode_ez_vec(&encoded), + ); + } + } + } +} + +/// 0-1 bytes of data before any amount of padding in final chunk = invalid byte, since padding +/// is not valid data (consistent with error for pad bytes in earlier chunks). +/// From this we know there must be 2-3 bytes of data before padding +#[apply(all_engines)] +fn decode_too_little_data_before_padding_error_invalid_byte(engine_wrapper: E) { + let mut rng = seeded_rng(); + + // want to test no prefix quad case, so start at 0 + let prefix_quads_range = distributions::Uniform::from(0_usize..256); + let suffix_data_len_range = distributions::Uniform::from(0_usize..=1); + for mode in all_pad_modes() { + // we don't encode so we don't care about encode padding + let engine = E::standard_with_pad_mode(true, mode); + for _ in 0..100_000 { + let suffix_data_len = suffix_data_len_range.sample(&mut rng); + let prefix_quad_len = prefix_quads_range.sample(&mut rng); + + // ensure there is a suffix quad + let min_padding = usize::from(suffix_data_len == 0); + + // for all possible padding lengths + for padding_len in min_padding..=(4 - suffix_data_len) { + let mut encoded = "ABCD".repeat(prefix_quad_len).into_bytes(); + encoded.resize(encoded.len() + suffix_data_len, b'A'); + encoded.resize(encoded.len() + padding_len, PAD_BYTE); + + if suffix_data_len + padding_len == 1 { + assert_eq!( + Err(DecodeError::InvalidLength), + engine.decode_ez_vec(&encoded), + ); + } else { + assert_eq!( + Err(DecodeError::InvalidByte( + prefix_quad_len * 4 + suffix_data_len, + PAD_BYTE, + )), + engine.decode_ez_vec(&encoded), + "suffix data len {} pad len {}", + suffix_data_len, + padding_len + ); + } + } + } + } +} + +/// Requires canonical padding -> accepts 2 + 2, 3 + 1, 4 + 0 final quad configurations +#[apply(all_engines)] +fn decode_pad_mode_requires_canonical_accepts_canonical(engine_wrapper: E) { + assert_all_suffixes_ok( + E::standard_with_pad_mode(true, DecodePaddingMode::RequireCanonical), + vec!["/w==", "iYU=", "AAAA"], + ); +} + +/// Requires canonical padding -> rejects 2 + 0-1, 3 + 0 final chunk configurations +#[apply(all_engines)] +fn decode_pad_mode_requires_canonical_rejects_non_canonical(engine_wrapper: E) { + let engine = E::standard_with_pad_mode(true, DecodePaddingMode::RequireCanonical); + + let suffixes = vec!["/w", "/w=", "iYU"]; + for num_prefix_quads in 0..256 { + for &suffix in suffixes.iter() { + let mut encoded = "AAAA".repeat(num_prefix_quads); + encoded.push_str(suffix); + + let res = engine.decode_ez_str_vec(&encoded); + + assert_eq!(Err(DecodeError::InvalidPadding), res); + } + } +} + +/// Requires no padding -> accepts 2 + 0, 3 + 0, 4 + 0 final chunk configuration +#[apply(all_engines)] +fn decode_pad_mode_requires_no_padding_accepts_no_padding(engine_wrapper: E) { + assert_all_suffixes_ok( + E::standard_with_pad_mode(true, DecodePaddingMode::RequireNone), + vec!["/w", "iYU", "AAAA"], + ); +} + +/// Requires no padding -> rejects 2 + 1-2, 3 + 1 final chunk configuration +#[apply(all_engines)] +fn decode_pad_mode_requires_no_padding_rejects_any_padding(engine_wrapper: E) { + let engine = E::standard_with_pad_mode(true, DecodePaddingMode::RequireNone); + + let suffixes = vec!["/w=", "/w==", "iYU="]; + for num_prefix_quads in 0..256 { + for &suffix in suffixes.iter() { + let mut encoded = "AAAA".repeat(num_prefix_quads); + encoded.push_str(suffix); + + let res = engine.decode_ez_str_vec(&encoded); + + assert_eq!(Err(DecodeError::InvalidPadding), res); + } } } +/// Indifferent padding accepts 2 + 0-2, 3 + 0-1, 4 + 0 final chunk configuration +#[apply(all_engines)] +fn decode_pad_mode_indifferent_padding_accepts_anything(engine_wrapper: E) { + assert_all_suffixes_ok( + E::standard_with_pad_mode(true, DecodePaddingMode::Indifferent), + vec!["/w", "/w=", "/w==", "iYU", "iYU=", "AAAA"], + ); +} + //this is a MAY in the rfc: https://tools.ietf.org/html/rfc4648#section-3.3 #[apply(all_engines)] fn decode_pad_byte_in_penultimate_quad_error(engine_wrapper: E) { - let engine = E::standard(); + for mode in all_pad_modes() { + // we don't encode so we don't care about encode padding + let engine = E::standard_with_pad_mode(true, mode); + + for num_prefix_quads in 0..256 { + // leave room for at least one pad byte in penultimate quad + for num_valid_bytes_penultimate_quad in 0..4 { + // can't have 1 or it would be invalid length + for num_pad_bytes_in_final_quad in 2..=4 { + let mut s: String = "ABCD".repeat(num_prefix_quads); + + // varying amounts of padding in the penultimate quad + for _ in 0..num_valid_bytes_penultimate_quad { + s.push('A'); + } + // finish penultimate quad with padding + for _ in num_valid_bytes_penultimate_quad..4 { + s.push('='); + } + // and more padding in the final quad + for _ in 0..num_pad_bytes_in_final_quad { + s.push('='); + } + + // padding should be an invalid byte before the final quad. + // Could argue that the *next* padding byte (in the next quad) is technically the first + // erroneous one, but reporting that accurately is more complex and probably nobody cares + assert_eq!( + DecodeError::InvalidByte( + num_prefix_quads * 4 + num_valid_bytes_penultimate_quad, + b'=', + ), + engine.decode_ez_str_vec(&s).unwrap_err() + ); + } + } + } + } +} + +#[apply(all_engines)] +fn decode_bytes_after_padding_in_final_quad_error(engine_wrapper: E) { + for mode in all_pad_modes() { + // we don't encode so we don't care about encode padding + let engine = E::standard_with_pad_mode(true, mode); - for num_prefix_quads in 0..50 { - for num_valid_bytes_penultimate_quad in 0..4 { - // can't have 1 or it would be invalid length - for num_pad_bytes_in_final_quad in 2..4 { + for num_prefix_quads in 0..256 { + // leave at least one byte in the quad for padding + for bytes_after_padding in 1..4 { let mut s: String = "ABCD".repeat(num_prefix_quads); - // varying amounts of padding in the penultimate quad - for _ in 0..num_valid_bytes_penultimate_quad { + // every invalid padding position with a 3-byte final quad: 1 to 3 bytes after padding + for _ in 0..(3 - bytes_after_padding) { s.push('A'); } - // finish penultimate quad with padding - for _ in num_valid_bytes_penultimate_quad..4 { - s.push('='); - } - // and more padding in the final quad - for _ in 0..num_pad_bytes_in_final_quad { - s.push('='); + s.push('='); + for _ in 0..bytes_after_padding { + s.push('A'); } - // padding should be an invalid byte before the final quad. - // Could argue that the *next* padding byte (in the next quad) is technically the first - // erroneous one, but reporting that accurately is more complex and probably nobody cares + // First (and only) padding byte is invalid. assert_eq!( DecodeError::InvalidByte( - num_prefix_quads * 4 + num_valid_bytes_penultimate_quad, - b'=', + num_prefix_quads * 4 + (3 - bytes_after_padding), + b'=' ), engine.decode_ez_str_vec(&s).unwrap_err() ); @@ -598,68 +827,48 @@ fn decode_pad_byte_in_penultimate_quad_error(engine_wrapper: E } #[apply(all_engines)] -fn decode_bytes_after_padding_in_final_quad_error(engine_wrapper: E) { - let engine = E::standard(); +fn decode_absurd_pad_error(engine_wrapper: E) { + for mode in all_pad_modes() { + // we don't encode so we don't care about encode padding + let engine = E::standard_with_pad_mode(true, mode); - for num_prefix_quads in 0..50 { - for bytes_after_padding in 1..4 { + for num_prefix_quads in 0..256 { let mut s: String = "ABCD".repeat(num_prefix_quads); + s.push_str("==Y=Wx===pY=2U====="); - // every invalid padding position with a 3-byte final quad: 1 to 3 bytes after padding - for _ in 0..(3 - bytes_after_padding) { - s.push('A'); - } - s.push('='); - for _ in 0..bytes_after_padding { - s.push('A'); - } - - // First (and only) padding byte is invalid. + // first padding byte assert_eq!( - DecodeError::InvalidByte(num_prefix_quads * 4 + (3 - bytes_after_padding), b'='), + DecodeError::InvalidByte(num_prefix_quads * 4, b'='), engine.decode_ez_str_vec(&s).unwrap_err() ); } } } -#[apply(all_engines)] -fn decode_absurd_pad_error(engine_wrapper: E) { - let engine = E::standard(); - - for num_prefix_quads in 0..50 { - let mut s: String = "ABCD".repeat(num_prefix_quads); - s.push_str("==Y=Wx===pY=2U====="); - - // first padding byte - assert_eq!( - DecodeError::InvalidByte(num_prefix_quads * 4, b'='), - engine.decode_ez_str_vec(&s).unwrap_err() - ); - } -} - #[apply(all_engines)] fn decode_too_much_padding_returns_error(engine_wrapper: E) { - let engine = E::standard(); + for mode in all_pad_modes() { + // we don't encode so we don't care about encode padding + let engine = E::standard_with_pad_mode(true, mode); - for num_prefix_quads in 0..50 { - // add enough padding to ensure that we'll hit all decode stages at the different lengths - for pad_bytes in 1..64 { - let mut s: String = "ABCD".repeat(num_prefix_quads); - let padding: String = "=".repeat(pad_bytes); - s.push_str(&padding); - - if pad_bytes % 4 == 1 { - assert_eq!( - DecodeError::InvalidLength, - engine.decode_ez_str_vec(&s).unwrap_err() - ); - } else { - assert_eq!( - DecodeError::InvalidByte(num_prefix_quads * 4, b'='), - engine.decode_ez_str_vec(&s).unwrap_err() - ); + for num_prefix_quads in 0..256 { + // add enough padding to ensure that we'll hit all decode stages at the different lengths + for pad_bytes in 1..=64 { + let mut s: String = "ABCD".repeat(num_prefix_quads); + let padding: String = "=".repeat(pad_bytes); + s.push_str(&padding); + + if pad_bytes % 4 == 1 { + assert_eq!( + DecodeError::InvalidLength, + engine.decode_ez_str_vec(&s).unwrap_err() + ); + } else { + assert_eq!( + DecodeError::InvalidByte(num_prefix_quads * 4, b'='), + engine.decode_ez_str_vec(&s).unwrap_err() + ); + } } } } @@ -667,25 +876,28 @@ fn decode_too_much_padding_returns_error(engine_wrapper: E) { #[apply(all_engines)] fn decode_padding_followed_by_non_padding_returns_error(engine_wrapper: E) { - let engine = E::standard(); - - for num_prefix_quads in 0..50 { - for pad_bytes in 0..32 { - let mut s: String = "ABCD".repeat(num_prefix_quads); - let padding: String = "=".repeat(pad_bytes); - s.push_str(&padding); - s.push('E'); + for mode in all_pad_modes() { + // we don't encode so we don't care about encode padding + let engine = E::standard_with_pad_mode(true, mode); - if pad_bytes % 4 == 0 { - assert_eq!( - DecodeError::InvalidLength, - engine.decode_ez_str_vec(&s).unwrap_err() - ); - } else { - assert_eq!( - DecodeError::InvalidByte(num_prefix_quads * 4, b'='), - engine.decode_ez_str_vec(&s).unwrap_err() - ); + for num_prefix_quads in 0..256 { + for pad_bytes in 0..=32 { + let mut s: String = "ABCD".repeat(num_prefix_quads); + let padding: String = "=".repeat(pad_bytes); + s.push_str(&padding); + s.push('E'); + + if pad_bytes % 4 == 0 { + assert_eq!( + DecodeError::InvalidLength, + engine.decode_ez_str_vec(&s).unwrap_err() + ); + } else { + assert_eq!( + DecodeError::InvalidByte(num_prefix_quads * 4, b'='), + engine.decode_ez_str_vec(&s).unwrap_err() + ); + } } } } @@ -693,65 +905,72 @@ fn decode_padding_followed_by_non_padding_returns_error(engine #[apply(all_engines)] fn decode_one_char_in_final_quad_with_padding_error(engine_wrapper: E) { - let engine = E::standard(); + for mode in all_pad_modes() { + // we don't encode so we don't care about encode padding + let engine = E::standard_with_pad_mode(true, mode); - for num_prefix_quads in 0..50 { - let mut s: String = "ABCD".repeat(num_prefix_quads); - s.push_str("E="); + for num_prefix_quads in 0..256 { + let mut s: String = "ABCD".repeat(num_prefix_quads); + s.push_str("E="); - assert_eq!( - DecodeError::InvalidByte(num_prefix_quads * 4 + 1, b'='), - engine.decode_ez_str_vec(&s).unwrap_err() - ); + assert_eq!( + DecodeError::InvalidByte(num_prefix_quads * 4 + 1, b'='), + engine.decode_ez_str_vec(&s).unwrap_err() + ); - // more padding doesn't change the error - s.push('='); - assert_eq!( - DecodeError::InvalidByte(num_prefix_quads * 4 + 1, b'='), - engine.decode_ez_str_vec(&s).unwrap_err() - ); + // more padding doesn't change the error + s.push('='); + assert_eq!( + DecodeError::InvalidByte(num_prefix_quads * 4 + 1, b'='), + engine.decode_ez_str_vec(&s).unwrap_err() + ); - s.push('='); - assert_eq!( - DecodeError::InvalidByte(num_prefix_quads * 4 + 1, b'='), - engine.decode_ez_str_vec(&s).unwrap_err() - ); + s.push('='); + assert_eq!( + DecodeError::InvalidByte(num_prefix_quads * 4 + 1, b'='), + engine.decode_ez_str_vec(&s).unwrap_err() + ); + } } } #[apply(all_engines)] fn decode_too_few_symbols_in_final_quad_error(engine_wrapper: E) { - let engine = E::standard(); - - for num_prefix_quads in 0..50 { - for final_quad_symbols in 0..2 { - for padding_symbols in 0..(4 - final_quad_symbols) { - let mut s: String = "ABCD".repeat(num_prefix_quads); - - for _ in 0..final_quad_symbols { - s.push('A'); - } - for _ in 0..padding_symbols { - s.push('='); - } - - match final_quad_symbols + padding_symbols { - 0 => continue, - 1 => { - assert_eq!( - DecodeError::InvalidLength, - engine.decode_ez_str_vec(&s).unwrap_err() - ); + for mode in all_pad_modes() { + // we don't encode so we don't care about encode padding + let engine = E::standard_with_pad_mode(true, mode); + + for num_prefix_quads in 0..256 { + // <2 is invalid + for final_quad_symbols in 0..2 { + for padding_symbols in 0..=(4 - final_quad_symbols) { + let mut s: String = "ABCD".repeat(num_prefix_quads); + + for _ in 0..final_quad_symbols { + s.push('A'); } - _ => { - // error reported at first padding byte - assert_eq!( - DecodeError::InvalidByte( - num_prefix_quads * 4 + final_quad_symbols, - b'=', - ), - engine.decode_ez_str_vec(&s).unwrap_err() - ); + for _ in 0..padding_symbols { + s.push('='); + } + + match final_quad_symbols + padding_symbols { + 0 => continue, + 1 => { + assert_eq!( + DecodeError::InvalidLength, + engine.decode_ez_str_vec(&s).unwrap_err() + ); + } + _ => { + // error reported at first padding byte + assert_eq!( + DecodeError::InvalidByte( + num_prefix_quads * 4 + final_quad_symbols, + b'=', + ), + engine.decode_ez_str_vec(&s).unwrap_err() + ); + } } } } @@ -761,39 +980,80 @@ fn decode_too_few_symbols_in_final_quad_error(engine_wrapper: #[apply(all_engines)] fn decode_invalid_trailing_bytes(engine_wrapper: E) { - let engine = E::standard(); + for mode in all_pad_modes() { + // we don't encode so we don't care about encode padding + let engine = E::standard_with_pad_mode(true, mode); - for num_prefix_quads in 0..50 { - let mut s: String = "ABCD".repeat(num_prefix_quads); - s.push_str("Cg==\n"); + for num_prefix_quads in 0..256 { + let mut s: String = "ABCD".repeat(num_prefix_quads); + s.push_str("Cg==\n"); - // The case of trailing newlines is common enough to warrant a test for a good error - // message. - assert_eq!( - Err(DecodeError::InvalidByte(num_prefix_quads * 4 + 4, b'\n')), - engine.decode_ez_str_vec(&s) - ); + // The case of trailing newlines is common enough to warrant a test for a good error + // message. + assert_eq!( + Err(DecodeError::InvalidByte(num_prefix_quads * 4 + 4, b'\n')), + engine.decode_ez_str_vec(&s) + ); - // extra padding, however, is still InvalidLength - let s = s.replace('\n', "="); - assert_eq!( - Err(DecodeError::InvalidLength), - engine.decode_ez_str_vec(&s) - ); + // extra padding, however, is still InvalidLength + let s = s.replace('\n', "="); + assert_eq!( + Err(DecodeError::InvalidLength), + engine.decode_ez_str_vec(&s) + ); + } + } +} + +#[apply(all_engines)] +fn decode_wrong_length_error(engine_wrapper: E) { + let engine = E::standard_with_pad_mode(true, DecodePaddingMode::Indifferent); + + for num_prefix_quads in 0..256 { + // at least one token, otherwise it wouldn't be a final quad + for num_tokens_final_quad in 1..=4 { + for num_padding in 0..=(4 - num_tokens_final_quad) { + let mut s: String = "IIII".repeat(num_prefix_quads); + for _ in 0..num_tokens_final_quad { + s.push('g'); + } + for _ in 0..num_padding { + s.push('='); + } + + let res = engine.decode_ez_str_vec(&s); + if num_tokens_final_quad >= 2 { + assert!(res.is_ok()); + } else if num_tokens_final_quad == 1 && num_padding > 0 { + // = is invalid if it's too early + assert_eq!( + Err(DecodeError::InvalidByte( + num_prefix_quads * 4 + num_tokens_final_quad, + 61 + )), + res + ); + } else if num_padding > 2 { + assert_eq!(Err(DecodeError::InvalidPadding), res); + } else { + assert_eq!(Err(DecodeError::InvalidLength), res); + } + } + } } } /// Returns a tuple of the original data length, the encoded data length (just data), and the length including padding. /// /// Vecs provided should be empty. -fn generate_random_encoded_data>( +fn generate_random_encoded_data>( engine: &E, orig_data: &mut Vec, encode_buf: &mut Vec, rng: &mut R, length_distribution: &D, ) -> (usize, usize, usize) { - let padding: bool = rng.gen(); + let padding: bool = engine.config().encode_padding(); let orig_len = fill_rand(orig_data, rng, length_distribution); let expected_encoded_len = encode::encoded_len(orig_len, padding).unwrap(); @@ -812,13 +1072,8 @@ fn generate_random_encoded_data>( (orig_len, base_encoded_len, enc_len_with_padding) } -fn engine_encoded_len(len: usize) -> usize { - // engines don't pad - encode::encoded_len(len, false).unwrap() -} - // fill to a random length -fn fill_rand>( +fn fill_rand>( vec: &mut Vec, rng: &mut R, length_distribution: &D, @@ -831,7 +1086,7 @@ fn fill_rand>( len } -fn fill_rand_len(vec: &mut Vec, rng: &mut R, len: usize) { +fn fill_rand_len(vec: &mut Vec, rng: &mut R, len: usize) { for _ in 0..len { vec.push(rng.gen()); } @@ -856,14 +1111,23 @@ trait EngineWrapper { /// Return an engine configured for RFC standard base64 fn standard() -> Self::Engine; + /// Return an engine configured for RFC standard base64, except with no padding appended on + /// encode, and required no padding on decode. + fn standard_unpadded() -> Self::Engine; + + /// Return an engine configured for RFC standard alphabet with the provided encode and decode + /// pad settings + fn standard_with_pad_mode(encode_pad: bool, decode_pad_mode: DecodePaddingMode) + -> Self::Engine; + /// Return an engine configured for RFC standard base64 that allows invalid trailing bits - fn standard_forgiving() -> Self::Engine; + fn standard_allow_trailing_bits() -> Self::Engine; /// Return an engine configured with a randomized alphabet and config - fn random(rng: &mut R) -> Self::Engine; + fn random(rng: &mut R) -> Self::Engine; /// Return an engine configured with the specified alphabet and randomized config - fn random_alphabet(rng: &mut R, alphabet: &Alphabet) -> Self::Engine; + fn random_alphabet(rng: &mut R, alphabet: &Alphabet) -> Self::Engine; } struct FastPortableWrapper {} @@ -875,25 +1139,40 @@ impl EngineWrapper for FastPortableWrapper { fast_portable::FastPortable::from(&STANDARD, fast_portable::PAD) } - fn standard_forgiving() -> Self::Engine { + fn standard_unpadded() -> Self::Engine { + fast_portable::FastPortable::from( + &STANDARD, + fast_portable::NO_PAD.with_decode_padding_mode(DecodePaddingMode::RequireNone), + ) + } + + fn standard_with_pad_mode( + encode_pad: bool, + decode_pad_mode: DecodePaddingMode, + ) -> Self::Engine { + fast_portable::FastPortable::from( + &STANDARD, + fast_portable::FastPortableConfig::new() + .with_encode_padding(encode_pad) + .with_decode_padding_mode(decode_pad_mode), + ) + } + + fn standard_allow_trailing_bits() -> Self::Engine { fast_portable::FastPortable::from( &STANDARD, fast_portable::FastPortableConfig::new().with_decode_allow_trailing_bits(true), ) } - fn random(rng: &mut R) -> Self::Engine { + fn random(rng: &mut R) -> Self::Engine { let alphabet = random_alphabet(rng); Self::random_alphabet(rng, alphabet) } - fn random_alphabet(rng: &mut R, alphabet: &Alphabet) -> Self::Engine { - let config = fast_portable::FastPortableConfig::new() - .with_encode_padding(rng.gen()) - .with_decode_allow_trailing_bits(rng.gen()); - - fast_portable::FastPortable::from(alphabet, config) + fn random_alphabet(rng: &mut R, alphabet: &Alphabet) -> Self::Engine { + fast_portable::FastPortable::from(alphabet, random_config(rng)) } } @@ -906,32 +1185,66 @@ impl EngineWrapper for NaiveWrapper { naive::Naive::from( &STANDARD, naive::NaiveConfig { - padding: true, + encode_padding: true, + decode_allow_trailing_bits: false, + decode_padding_mode: DecodePaddingMode::RequireCanonical, + }, + ) + } + + fn standard_unpadded() -> Self::Engine { + naive::Naive::from( + &STANDARD, + naive::NaiveConfig { + encode_padding: false, decode_allow_trailing_bits: false, + decode_padding_mode: DecodePaddingMode::RequireNone, }, ) } - fn standard_forgiving() -> Self::Engine { + fn standard_with_pad_mode( + encode_pad: bool, + decode_pad_mode: DecodePaddingMode, + ) -> Self::Engine { naive::Naive::from( &STANDARD, naive::NaiveConfig { - padding: true, + encode_padding: false, + decode_allow_trailing_bits: false, + decode_padding_mode: decode_pad_mode, + }, + ) + } + + fn standard_allow_trailing_bits() -> Self::Engine { + naive::Naive::from( + &STANDARD, + naive::NaiveConfig { + encode_padding: true, decode_allow_trailing_bits: true, + decode_padding_mode: DecodePaddingMode::RequireCanonical, }, ) } - fn random(rng: &mut R) -> Self::Engine { + fn random(rng: &mut R) -> Self::Engine { let alphabet = random_alphabet(rng); Self::random_alphabet(rng, alphabet) } - fn random_alphabet(rng: &mut R, alphabet: &Alphabet) -> Self::Engine { + fn random_alphabet(rng: &mut R, alphabet: &Alphabet) -> Self::Engine { + let mode = rng.gen(); + let config = naive::NaiveConfig { - padding: rng.gen(), + encode_padding: match mode { + DecodePaddingMode::Indifferent => rng.gen(), + DecodePaddingMode::RequireCanonical => true, + DecodePaddingMode::RequireNone => false, + }, decode_allow_trailing_bits: rng.gen(), + decode_padding_mode: mode, }; naive::Naive::from(alphabet, config) @@ -970,3 +1283,27 @@ trait EngineExtensions: Engine { } impl EngineExtensions for E {} + +fn seeded_rng() -> impl rand::Rng { + rngs::SmallRng::from_entropy() +} + +fn all_pad_modes() -> Vec { + vec![ + DecodePaddingMode::Indifferent, + DecodePaddingMode::RequireCanonical, + DecodePaddingMode::RequireNone, + ] +} + +fn assert_all_suffixes_ok(engine: E, suffixes: Vec<&str>) { + for num_prefix_quads in 0..256 { + for &suffix in suffixes.iter() { + let mut encoded = "AAAA".repeat(num_prefix_quads); + encoded.push_str(suffix); + + let res = &engine.decode_ez_str_vec(&encoded); + assert!(res.is_ok()); + } + } +} diff --git a/src/read/decoder.rs b/src/read/decoder.rs index 014d1b0..13370bb 100644 --- a/src/read/decoder.rs +++ b/src/read/decoder.rs @@ -144,6 +144,7 @@ impl<'e, E: Engine, R: io::Read> DecoderReader<'e, E, R> { DecodeError::InvalidLastSymbol(offset, byte) => { DecodeError::InvalidLastSymbol(self.total_b64_decoded + offset, byte) } + DecodeError::InvalidPadding => DecodeError::InvalidPadding, }) .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; diff --git a/src/tests.rs b/src/tests.rs index f492fa1..bb5458f 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -1,15 +1,22 @@ -use crate::{alphabet, decode_engine, encode::encoded_len, encode_engine_string}; - use std::str; -use crate::engine::fast_portable::{FastPortable, FastPortableConfig}; -use crate::engine::{Config, Engine}; use rand::{ - distributions::{Distribution, Uniform}, + distributions, + distributions::{Distribution as _, Uniform}, seq::SliceRandom, Rng, SeedableRng, }; +use crate::{ + alphabet, decode_engine, + encode::encoded_len, + encode_engine_string, + engine::{ + fast_portable::{FastPortable, FastPortableConfig}, + Config, DecodePaddingMode, Engine, + }, +}; + #[test] fn roundtrip_random_config_short() { // exercise the slower encode/decode routines that operate on shorter buffers more vigorously @@ -70,11 +77,27 @@ fn roundtrip_random_config(input_len_range: Uniform, iterations: u32) { } pub fn random_config(rng: &mut R) -> FastPortableConfig { + let mode = rng.gen(); FastPortableConfig::new() - .with_encode_padding(rng.gen()) + .with_encode_padding(match mode { + DecodePaddingMode::Indifferent => rng.gen(), + DecodePaddingMode::RequireCanonical => true, + DecodePaddingMode::RequireNone => false, + }) + .with_decode_padding_mode(mode) .with_decode_allow_trailing_bits(rng.gen()) } +impl distributions::Distribution for distributions::Standard { + fn sample(&self, rng: &mut R) -> DecodePaddingMode { + match rng.gen_range(0..=2) { + 0 => DecodePaddingMode::Indifferent, + 1 => DecodePaddingMode::RequireCanonical, + _ => DecodePaddingMode::RequireNone, + } + } +} + pub fn random_alphabet(rng: &mut R) -> &'static alphabet::Alphabet { ALPHABETS.choose(rng).unwrap() } diff --git a/tests/decode.rs b/tests/decode.rs deleted file mode 100644 index 4054762..0000000 --- a/tests/decode.rs +++ /dev/null @@ -1,84 +0,0 @@ -use base64::engine::fast_portable::{FastPortable, NO_PAD}; -use base64::engine::DEFAULT_ENGINE; -use base64::*; - -use self::helpers::*; - -mod helpers; - -#[test] -fn decode_rfc4648_0() { - compare_decode("", ""); -} - -#[test] -fn decode_rfc4648_1() { - compare_decode("f", "Zg=="); -} - -#[test] -fn decode_rfc4648_1_just_a_bit_of_padding() { - // allows less padding than required - compare_decode("f", "Zg="); -} - -#[test] -fn decode_rfc4648_1_no_padding() { - compare_decode("f", "Zg"); -} - -#[test] -fn decode_rfc4648_2() { - compare_decode("fo", "Zm8="); -} - -#[test] -fn decode_rfc4648_2_no_padding() { - compare_decode("fo", "Zm8"); -} - -#[test] -fn decode_rfc4648_3() { - compare_decode("foo", "Zm9v"); -} - -#[test] -fn decode_rfc4648_4() { - compare_decode("foob", "Zm9vYg=="); -} - -#[test] -fn decode_rfc4648_4_no_padding() { - compare_decode("foob", "Zm9vYg"); -} - -#[test] -fn decode_rfc4648_5() { - compare_decode("fooba", "Zm9vYmE="); -} - -#[test] -fn decode_rfc4648_5_no_padding() { - compare_decode("fooba", "Zm9vYmE"); -} - -#[test] -fn decode_rfc4648_6() { - compare_decode("foobar", "Zm9vYmFy"); -} - -#[test] -fn decode_reject_null() { - assert_eq!( - DecodeError::InvalidByte(3, 0x0), - decode_engine("YWx\0pY2U==", &DEFAULT_ENGINE).unwrap_err() - ); -} - -#[test] -fn decode_imap() { - assert_eq!( - decode_engine(b"+,,+", &FastPortable::from(&alphabet::IMAP_MUTF7, NO_PAD),), - decode_engine(b"+//+", &DEFAULT_ENGINE) - ); -} diff --git a/tests/encode.rs b/tests/encode.rs index 4301e46..80c19d2 100644 --- a/tests/encode.rs +++ b/tests/encode.rs @@ -1,46 +1,11 @@ use base64::alphabet::URL_SAFE; -use base64::engine::fast_portable::{NO_PAD, PAD}; +use base64::engine::fast_portable::PAD; use base64::*; fn compare_encode(expected: &str, target: &[u8]) { assert_eq!(expected, encode(target)); } -#[test] -fn encode_rfc4648_0() { - compare_encode("", b""); -} - -#[test] -fn encode_rfc4648_1() { - compare_encode("Zg==", b"f"); -} - -#[test] -fn encode_rfc4648_2() { - compare_encode("Zm8=", b"fo"); -} - -#[test] -fn encode_rfc4648_3() { - compare_encode("Zm9v", b"foo"); -} - -#[test] -fn encode_rfc4648_4() { - compare_encode("Zm9vYg==", b"foob"); -} - -#[test] -fn encode_rfc4648_5() { - compare_encode("Zm9vYmE=", b"fooba"); -} - -#[test] -fn encode_rfc4648_6() { - compare_encode("Zm9vYmFy", b"foobar"); -} - #[test] fn encode_all_ascii() { let mut ascii = Vec::::with_capacity(128); @@ -92,20 +57,7 @@ fn encode_all_bytes_url() { 8_T19vf4-fr7_P3-_w==", encode_engine( &bytes, - &engine::fast_portable::FastPortable::from(&URL_SAFE, PAD) + &engine::fast_portable::FastPortable::from(&URL_SAFE, PAD), ) ); } - -#[test] -fn encode_url_safe_without_padding() { - let encoded = encode_engine( - b"alice", - &engine::fast_portable::FastPortable::from(&URL_SAFE, NO_PAD), - ); - assert_eq!(&encoded, "YWxpY2U"); - assert_eq!( - String::from_utf8(decode(&encoded).unwrap()).unwrap(), - "alice" - ); -} diff --git a/tests/helpers.rs b/tests/helpers.rs deleted file mode 100644 index 47f7a53..0000000 --- a/tests/helpers.rs +++ /dev/null @@ -1,12 +0,0 @@ -use base64::*; - -pub fn compare_decode(expected: &str, target: &str) { - assert_eq!( - expected, - String::from_utf8(decode(target).unwrap()).unwrap() - ); - assert_eq!( - expected, - String::from_utf8(decode(target.as_bytes()).unwrap()).unwrap() - ); -} diff --git a/tests/tests.rs b/tests/tests.rs index ab54c90..ce2521b 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -3,12 +3,9 @@ use rand::{Rng, SeedableRng}; use base64::engine::{Engine, DEFAULT_ENGINE}; use base64::*; -use self::helpers::*; use base64::alphabet::STANDARD; use base64::engine::fast_portable::{FastPortable, NO_PAD}; -mod helpers; - // generate random contents of the specified length and test encode/decode roundtrip fn roundtrip_random( byte_buf: &mut Vec, @@ -123,13 +120,11 @@ fn roundtrip_decode_trailing_10_bytes() { let mut s: String = "ABCD".repeat(num_quads); s.push_str("EFGHIJKLZg"); - let decoded = decode(&s).unwrap(); + let engine = FastPortable::from(&STANDARD, NO_PAD); + let decoded = decode_engine(&s, &engine).unwrap(); assert_eq!(num_quads * 3 + 7, decoded.len()); - assert_eq!( - s, - encode_engine(&decoded, &FastPortable::from(&STANDARD, NO_PAD)) - ); + assert_eq!(s, encode_engine(&decoded, &engine)); } } @@ -148,42 +143,6 @@ fn display_wrapper_matches_normal_encode() { ); } -#[test] -fn because_we_can() { - compare_decode("alice", "YWxpY2U="); - compare_decode("alice", &encode(b"alice")); - compare_decode("alice", &encode(&decode(&encode(b"alice")).unwrap())); -} - -#[test] -fn encode_engine_slice_can_use_inline_buffer() { - let mut buf: [u8; 22] = [0; 22]; - let mut larger_buf: [u8; 24] = [0; 24]; - let mut input: [u8; 16] = [0; 16]; - - let engine = FastPortable::from(&STANDARD, NO_PAD); - - let mut rng = rand::rngs::SmallRng::from_entropy(); - for elt in &mut input { - *elt = rng.gen(); - } - - assert_eq!(22, encode_engine_slice(&input, &mut buf, &engine)); - let decoded = decode_engine(&buf, &engine).unwrap(); - - assert_eq!(decoded, input); - - // let's try it again with padding - - assert_eq!( - 24, - encode_engine_slice(&input, &mut larger_buf, &DEFAULT_ENGINE) - ); - let decoded = decode_engine(&buf, &DEFAULT_ENGINE).unwrap(); - - assert_eq!(decoded, input); -} - #[test] #[should_panic(expected = "index 24 out of range for slice of length 22")] fn encode_engine_slice_panics_when_buffer_too_small() { @@ -195,5 +154,5 @@ fn encode_engine_slice_panics_when_buffer_too_small() { *elt = rng.gen(); } - encode_engine_slice(&input, &mut buf, &DEFAULT_ENGINE); + encode_engine_slice(input, &mut buf, &DEFAULT_ENGINE); }