diff --git a/Cargo.toml b/Cargo.toml
index 2ce9eaa..f7e30b9 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "base64"
-version = "0.20.0"
+version = "0.21.0-rc.1"
 authors = ["Alice Maz <alice@alicemaz.com>", "Marshall Pierce <marshall@mpierce.org>"]
 description = "encodes and decodes base64 as bytes or utf8"
 repository = "https://github.com/marshallpierce/rust-base64"
diff --git a/README.md b/README.md
index ba2b015..d7b0885 100644
--- a/README.md
+++ b/README.md
@@ -14,20 +14,6 @@ e.g. `decode_engine_slice` decodes into an existing `&mut [u8]` and is pretty fa
 whereas `decode_engine` allocates a new `Vec<u8>` and returns it, which might be more convenient in some cases, but is
 slower (although still fast enough for almost any purpose) at 2.1 GiB/s.
 
-## Example
-
-```rust
-use base64::{encode, decode};
-
-fn main() {
-    let a = b"hello world";
-    let b = "aGVsbG8gd29ybGQ=";
-
-    assert_eq!(encode(a), b);
-    assert_eq!(a, &decode(b).unwrap()[..]);
-}
-```
-
 See the [docs](https://docs.rs/base64) for all the details.
 
 ## FAQ
diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
index 46f0489..4fcadda 100644
--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -1,22 +1,106 @@
+# 0.21.0
+
+(not yet released)
+
+
+## Migration
+
+### Functions
+
+| < 0.20 function         | 0.21 equivalent                                                                     |
+|-------------------------|-------------------------------------------------------------------------------------|
+| `encode()`              | `engine::general_purpose::STANDARD.encode()` or `prelude::BASE64_STANDARD.encode()` |
+| `encode_config()`       | `engine.encode()`                                                                   |
+| `encode_config_buf()`   | `engine.encode_string()`                                                            |
+| `encode_config_slice()` | `engine.encode_slice()`                                                             |
+| `decode()`              | `engine::general_purpose::STANDARD.decode()` or `prelude::BASE64_STANDARD.decode()` |
+| `decode_config()`       | `engine.decode()`                                                                   |
+| `decode_config_buf()`   | `engine.decode_vec()`                                                               |
+| `decode_config_slice()` | `engine.decode_slice()`                                                             |
+
+The short-lived 0.20 functions were the 0.13 functions with `config` replaced with `engine`.
+
+### Padding
+
+If applicable, use the preset engines `engine::STANDARD`, `engine::STANDARD_NO_PAD`, `engine::URL_SAFE`,
+or `engine::URL_SAFE_NO_PAD`.
+The `NO_PAD` ones require that padding is absent when decoding, and the others require that
+canonical padding is present .
+
+If you need the < 0.20 behavior that did not care about padding, or want to recreate < 0.20.0's predefined `Config`s
+precisely, see the following table.
+
+| 0.13.1 Config   | 0.20.0+ alphabet | `encode_padding` | `decode_padding_mode` |
+|-----------------|------------------|------------------|-----------------------|
+| STANDARD        | STANDARD         | true             | Indifferent           |
+| STANDARD_NO_PAD | STANDARD         | false            | Indifferent           |
+| URL_SAFE        | URL_SAFE         | true             | Indifferent           |
+| URL_SAFE_NO_PAD | URL_SAFE         | false            | Indifferent           |
+
+# 0.21.0-rc.1
+
+- Restore the ability to decode into a slice of precisely the correct length with `Engine.decode_slice_unchecked`.
+- Add `Engine` as a `pub use` in `prelude`.
+
+# 0.21.0-beta.2
+
+## Breaking changes
+
+- Re-exports of preconfigured engines in `engine` are removed in favor of `base64::prelude::...` that are better suited to those who wish to `use` the entire path to a name.
+
+# 0.21.0-beta.1
+
+## Breaking changes
+
+- `FastPortable` was only meant to be an interim name, and shouldn't have shipped in 0.20. It is now `GeneralPurpose` to
+  make its intended usage more clear.
+- `GeneralPurpose` and its config are now `pub use`'d in the `engine` module for convenience.
+- Change a few `from()` functions to be `new()`. `from()` causes confusing compiler errors because of confusion
+  with `From::from`, and is a little misleading because some of those invocations are not very cheap as one would
+  usually expect from a `from` call.
+- `encode*` and `decode*` top level functions are now methods on `Engine`.
+- `DEFAULT_ENGINE` was replaced by `engine::general_purpose::STANDARD`
+- Predefined engine consts `engine::general_purpose::{STANDARD, STANDARD_NO_PAD, URL_SAFE, URL_SAFE_NO_PAD}`
+    - These are `pub use`d into `engine` as well
+- The `*_slice` decode/encode functions now return an error instead of panicking when the output slice is too small
+    - As part of this, there isn't now a public way to decode into a slice _exactly_ the size needed for inputs that
+      aren't multiples of 4 tokens. If adding up to 2 bytes to always be a multiple of 3 bytes for the decode buffer is
+      a problem, file an issue.
+
+## Other changes
+
+- `decoded_len_estimate()` is provided to make it easy to size decode buffers correctly.
+
 # 0.20.0
 
-### Breaking changes
+## Breaking changes
 
 - Update MSRV to 1.57.0
-- Decoding can now either ignore padding, require correct padding, or require no padding. The default is to require correct padding.
-  - The `NO_PAD` config now requires that padding be absent when decoding.
+- Decoding can now either ignore padding, require correct padding, or require no padding. The default is to require
+  correct padding.
+    - The `NO_PAD` config now requires that padding be absent when decoding.
 
 ## 0.20.0-alpha.1
 
 ### Breaking changes
-- Extended the `Config` concept into the `Engine` abstraction, allowing the user to pick different encoding / decoding implementations.
-  - What was formerly the only algorithm is now the `FastPortable` engine, so named because it's portable (works on any CPU) and relatively fast.
-  - This opens the door to a portable constant-time implementation ([#153](https://github.com/marshallpierce/rust-base64/pull/153), presumably `ConstantTimePortable`?) for security-sensitive applications that need side-channel resistance, and CPU-specific SIMD implementations for  more speed.
-  - Standard base64 per the RFC is available via `DEFAULT_ENGINE`. To use different alphabets or other settings (padding, etc), create your own engine instance.
-- `CharacterSet` is now `Alphabet` (per the RFC), and allows creating custom alphabets. The corresponding tables that were previously code-generated are now built dynamically.
-- Since there are already multiple breaking changes, various functions are renamed to be more consistent and discoverable.
+
+- Extended the `Config` concept into the `Engine` abstraction, allowing the user to pick different encoding / decoding
+  implementations.
+    - What was formerly the only algorithm is now the `FastPortable` engine, so named because it's portable (works on
+      any CPU) and relatively fast.
+    - This opens the door to a portable constant-time
+      implementation ([#153](https://github.com/marshallpierce/rust-base64/pull/153),
+      presumably `ConstantTimePortable`?) for security-sensitive applications that need side-channel resistance, and
+      CPU-specific SIMD implementations for more speed.
+    - Standard base64 per the RFC is available via `DEFAULT_ENGINE`. To use different alphabets or other settings (
+      padding, etc), create your own engine instance.
+- `CharacterSet` is now `Alphabet` (per the RFC), and allows creating custom alphabets. The corresponding tables that
+  were previously code-generated are now built dynamically.
+- Since there are already multiple breaking changes, various functions are renamed to be more consistent and
+  discoverable.
 - MSRV is now 1.47.0 to allow various things to use `const fn`.
-- `DecoderReader` now owns its inner reader, and can expose it via `into_inner()`. For symmetry, `EncoderWriter` can do the same with its writer.
+- `DecoderReader` now owns its inner reader, and can expose it via `into_inner()`. For symmetry, `EncoderWriter` can do
+  the same with its writer.
 - `encoded_len` is now public so you can size encode buffers precisely.
 
 # 0.13.1
@@ -28,8 +112,11 @@
 - Config methods are const
 - Added `EncoderStringWriter` to allow encoding directly to a String
 - `EncoderWriter` now owns its delegate writer rather than keeping a reference to it (though refs still work)
-    - As a consequence, it is now possible to extract the delegate writer from an `EncoderWriter` via `finish()`, which returns `Result<W>` instead of `Result<()>`. If you were calling `finish()` explicitly, you will now need to use `let _ = foo.finish()` instead of just `foo.finish()` to avoid a warning about the unused value.
-- When decoding input that has both an invalid length and an invalid symbol as the last byte, `InvalidByte` will be emitted instead of `InvalidLength` to make the problem more obvious.
+    - As a consequence, it is now possible to extract the delegate writer from an `EncoderWriter` via `finish()`, which
+      returns `Result<W>` instead of `Result<()>`. If you were calling `finish()` explicitly, you will now need to
+      use `let _ = foo.finish()` instead of just `foo.finish()` to avoid a warning about the unused value.
+- When decoding input that has both an invalid length and an invalid symbol as the last byte, `InvalidByte` will be
+  emitted instead of `InvalidLength` to make the problem more obvious.
 
 # 0.12.2
 
@@ -47,23 +134,31 @@
 - A minor performance improvement in encoding
 
 # 0.11.0
+
 - Minimum rust version 1.34.0
 - `no_std` is now supported via the two new features `alloc` and `std`.
 
 # 0.10.1
 
 - Minimum rust version 1.27.2
-- Fix bug in streaming encoding ([#90](https://github.com/marshallpierce/rust-base64/pull/90)): if the underlying writer didn't write all the bytes given to it, the remaining bytes would not be retried later. See the docs on `EncoderWriter::write`.
+- Fix bug in streaming encoding ([#90](https://github.com/marshallpierce/rust-base64/pull/90)): if the underlying writer
+  didn't write all the bytes given to it, the remaining bytes would not be retried later. See the docs
+  on `EncoderWriter::write`.
 - Make it configurable whether or not to return an error when decoding detects excess trailing bits.
 
 # 0.10.0
 
-- Remove line wrapping. Line wrapping was never a great conceptual fit in this library, and other features (streaming encoding, etc) either couldn't support it or could support only special cases of it with a great increase in complexity. Line wrapping has been pulled out into a [line-wrap](https://crates.io/crates/line-wrap) crate, so it's still available if you need it.
-  - `Base64Display` creation no longer uses a `Result` because it can't fail, which means its helper methods for common
-  configs that `unwrap()` for you are no longer needed
+- Remove line wrapping. Line wrapping was never a great conceptual fit in this library, and other features (streaming
+  encoding, etc) either couldn't support it or could support only special cases of it with a great increase in
+  complexity. Line wrapping has been pulled out into a [line-wrap](https://crates.io/crates/line-wrap) crate, so it's
+  still available if you need it.
+    - `Base64Display` creation no longer uses a `Result` because it can't fail, which means its helper methods for
+      common
+      configs that `unwrap()` for you are no longer needed
 - Add a streaming encoder `Write` impl to transparently base64 as you write.
 - Remove the remaining `unsafe` code.
-- Remove whitespace stripping to simplify `no_std` support. No out of the box configs use it, and it's trivial to do yourself if needed: `filter(|b| !b" \n\t\r\x0b\x0c".contains(b)`.
+- Remove whitespace stripping to simplify `no_std` support. No out of the box configs use it, and it's trivial to do
+  yourself if needed: `filter(|b| !b" \n\t\r\x0b\x0c".contains(b)`.
 - Detect invalid trailing symbols when decoding and return an error rather than silently ignoring them.
 
 # 0.9.3
diff --git a/benches/benchmarks.rs b/benches/benchmarks.rs
index b0702d3..61d542f 100644
--- a/benches/benchmarks.rs
+++ b/benches/benchmarks.rs
@@ -1,13 +1,11 @@
 #[macro_use]
 extern crate criterion;
 
-use base64::display;
 use base64::{
-    decode, decode_engine_slice, decode_engine_vec, encode, encode_engine_slice,
-    encode_engine_string, write,
+    display,
+    engine::{general_purpose::STANDARD, Engine},
+    write,
 };
-
-use base64::engine::DEFAULT_ENGINE;
 use criterion::{black_box, Bencher, BenchmarkId, Criterion, Throughput};
 use rand::{Rng, SeedableRng};
 use std::io::{self, Read, Write};
@@ -15,10 +13,10 @@ use std::io::{self, Read, Write};
 fn do_decode_bench(b: &mut Bencher, &size: &usize) {
     let mut v: Vec<u8> = Vec::with_capacity(size * 3 / 4);
     fill(&mut v);
-    let encoded = encode(&v);
+    let encoded = STANDARD.encode(&v);
 
     b.iter(|| {
-        let orig = decode(&encoded);
+        let orig = STANDARD.decode(&encoded);
         black_box(&orig);
     });
 }
@@ -26,11 +24,11 @@ fn do_decode_bench(b: &mut Bencher, &size: &usize) {
 fn do_decode_bench_reuse_buf(b: &mut Bencher, &size: &usize) {
     let mut v: Vec<u8> = Vec::with_capacity(size * 3 / 4);
     fill(&mut v);
-    let encoded = encode(&v);
+    let encoded = STANDARD.encode(&v);
 
     let mut buf = Vec::new();
     b.iter(|| {
-        decode_engine_vec(&encoded, &mut buf, &DEFAULT_ENGINE).unwrap();
+        STANDARD.decode_vec(&encoded, &mut buf).unwrap();
         black_box(&buf);
         buf.clear();
     });
@@ -39,12 +37,12 @@ fn do_decode_bench_reuse_buf(b: &mut Bencher, &size: &usize) {
 fn do_decode_bench_slice(b: &mut Bencher, &size: &usize) {
     let mut v: Vec<u8> = Vec::with_capacity(size * 3 / 4);
     fill(&mut v);
-    let encoded = encode(&v);
+    let encoded = STANDARD.encode(&v);
 
     let mut buf = Vec::new();
     buf.resize(size, 0);
     b.iter(|| {
-        decode_engine_slice(&encoded, &mut buf, &DEFAULT_ENGINE).unwrap();
+        STANDARD.decode_slice(&encoded, &mut buf).unwrap();
         black_box(&buf);
     });
 }
@@ -52,7 +50,7 @@ fn do_decode_bench_slice(b: &mut Bencher, &size: &usize) {
 fn do_decode_bench_stream(b: &mut Bencher, &size: &usize) {
     let mut v: Vec<u8> = Vec::with_capacity(size * 3 / 4);
     fill(&mut v);
-    let encoded = encode(&v);
+    let encoded = STANDARD.encode(&v);
 
     let mut buf = Vec::new();
     buf.resize(size, 0);
@@ -60,7 +58,7 @@ fn do_decode_bench_stream(b: &mut Bencher, &size: &usize) {
 
     b.iter(|| {
         let mut cursor = io::Cursor::new(&encoded[..]);
-        let mut decoder = base64::read::DecoderReader::from(&mut cursor, &DEFAULT_ENGINE);
+        let mut decoder = base64::read::DecoderReader::new(&mut cursor, &STANDARD);
         decoder.read_to_end(&mut buf).unwrap();
         buf.clear();
         black_box(&buf);
@@ -71,7 +69,7 @@ fn do_encode_bench(b: &mut Bencher, &size: &usize) {
     let mut v: Vec<u8> = Vec::with_capacity(size);
     fill(&mut v);
     b.iter(|| {
-        let e = encode(&v);
+        let e = STANDARD.encode(&v);
         black_box(&e);
     });
 }
@@ -80,7 +78,7 @@ fn do_encode_bench_display(b: &mut Bencher, &size: &usize) {
     let mut v: Vec<u8> = Vec::with_capacity(size);
     fill(&mut v);
     b.iter(|| {
-        let e = format!("{}", display::Base64Display::from(&v, &DEFAULT_ENGINE));
+        let e = format!("{}", display::Base64Display::new(&v, &STANDARD));
         black_box(&e);
     });
 }
@@ -90,7 +88,7 @@ fn do_encode_bench_reuse_buf(b: &mut Bencher, &size: &usize) {
     fill(&mut v);
     let mut buf = String::new();
     b.iter(|| {
-        encode_engine_string(&v, &mut buf, &DEFAULT_ENGINE);
+        STANDARD.encode_string(&v, &mut buf);
         buf.clear();
     });
 }
@@ -101,9 +99,7 @@ fn do_encode_bench_slice(b: &mut Bencher, &size: &usize) {
     let mut buf = Vec::new();
     // conservative estimate of encoded size
     buf.resize(v.len() * 2, 0);
-    b.iter(|| {
-        encode_engine_slice(&v, &mut buf, &DEFAULT_ENGINE);
-    });
+    b.iter(|| STANDARD.encode_slice(&v, &mut buf).unwrap());
 }
 
 fn do_encode_bench_stream(b: &mut Bencher, &size: &usize) {
@@ -114,7 +110,7 @@ fn do_encode_bench_stream(b: &mut Bencher, &size: &usize) {
     buf.reserve(size * 2);
     b.iter(|| {
         buf.clear();
-        let mut stream_enc = write::EncoderWriter::from(&mut buf, &DEFAULT_ENGINE);
+        let mut stream_enc = write::EncoderWriter::new(&mut buf, &STANDARD);
         stream_enc.write_all(&v).unwrap();
         stream_enc.flush().unwrap();
     });
@@ -125,7 +121,7 @@ fn do_encode_bench_string_stream(b: &mut Bencher, &size: &usize) {
     fill(&mut v);
 
     b.iter(|| {
-        let mut stream_enc = write::EncoderStringWriter::from(&DEFAULT_ENGINE);
+        let mut stream_enc = write::EncoderStringWriter::new(&STANDARD);
         stream_enc.write_all(&v).unwrap();
         stream_enc.flush().unwrap();
         let _ = stream_enc.into_inner();
@@ -139,7 +135,7 @@ fn do_encode_bench_string_reuse_buf_stream(b: &mut Bencher, &size: &usize) {
     let mut buf = String::new();
     b.iter(|| {
         buf.clear();
-        let mut stream_enc = write::EncoderStringWriter::from_consumer(&mut buf, &DEFAULT_ENGINE);
+        let mut stream_enc = write::EncoderStringWriter::from_consumer(&mut buf, &STANDARD);
         stream_enc.write_all(&v).unwrap();
         stream_enc.flush().unwrap();
         let _ = stream_enc.into_inner();
diff --git a/examples/base64.rs b/examples/base64.rs
index 0218ccf..0a214d2 100644
--- a/examples/base64.rs
+++ b/examples/base64.rs
@@ -61,21 +61,21 @@ fn main() {
     };
 
     let alphabet = opt.alphabet.unwrap_or_default();
-    let engine = engine::fast_portable::FastPortable::from(
+    let engine = engine::GeneralPurpose::new(
         &match alphabet {
             Alphabet::Standard => alphabet::STANDARD,
             Alphabet::UrlSafe => alphabet::URL_SAFE,
         },
-        engine::fast_portable::PAD,
+        engine::general_purpose::PAD,
     );
 
     let stdout = io::stdout();
     let mut stdout = stdout.lock();
     let r = if opt.decode {
-        let mut decoder = read::DecoderReader::from(&mut input, &engine);
+        let mut decoder = read::DecoderReader::new(&mut input, &engine);
         io::copy(&mut decoder, &mut stdout)
     } else {
-        let mut encoder = write::EncoderWriter::from(&mut stdout, &engine);
+        let mut encoder = write::EncoderWriter::new(&mut stdout, &engine);
         io::copy(&mut input, &mut encoder)
     };
     if let Err(e) = r {
diff --git a/fuzz/fuzzers/decode_random.rs b/fuzz/fuzzers/decode_random.rs
index 5769578..90e5c7a 100644
--- a/fuzz/fuzzers/decode_random.rs
+++ b/fuzz/fuzzers/decode_random.rs
@@ -11,5 +11,5 @@ fuzz_target!(|data: &[u8]| {
 
     // The data probably isn't valid base64 input, but as long as it returns an error instead
     // of crashing, that's correct behavior.
-    let _ = decode_engine(data, &engine);
+    let _ = engine.decode(data);
 });
diff --git a/fuzz/fuzzers/roundtrip.rs b/fuzz/fuzzers/roundtrip.rs
index 8b0a98b..128c6b8 100644
--- a/fuzz/fuzzers/roundtrip.rs
+++ b/fuzz/fuzzers/roundtrip.rs
@@ -2,10 +2,10 @@
 #[macro_use] extern crate libfuzzer_sys;
 extern crate base64;
 
-use base64::engine::DEFAULT_ENGINE;
+use base64::{Engine as _, engine::general_purpose::STANDARD};
 
 fuzz_target!(|data: &[u8]| {
-    let encoded = base64::encode_engine(data, &DEFAULT_ENGINE);
-    let decoded = base64::decode_engine(&encoded, &DEFAULT_ENGINE).unwrap();
+    let encoded = STANDARD.encode(data);
+    let decoded = STANDARD.decode(&encoded).unwrap();
     assert_eq!(data, decoded.as_slice());
 });
diff --git a/fuzz/fuzzers/roundtrip_no_pad.rs b/fuzz/fuzzers/roundtrip_no_pad.rs
index def2ebb..3e5988f 100644
--- a/fuzz/fuzzers/roundtrip_no_pad.rs
+++ b/fuzz/fuzzers/roundtrip_no_pad.rs
@@ -3,15 +3,15 @@
 extern crate libfuzzer_sys;
 extern crate base64;
 
-use base64::engine::{self, fast_portable};
+use base64::{Engine as _, engine::{self, general_purpose}};
 
 fuzz_target!(|data: &[u8]| {
-    let config = fast_portable::FastPortableConfig::new()
+    let config = general_purpose::GeneralPurposeConfig::new()
         .with_encode_padding(false)
         .with_decode_padding_mode(engine::DecodePaddingMode::RequireNone);
-    let engine = fast_portable::FastPortable::from(&base64::alphabet::STANDARD, config);
+    let engine = general_purpose::GeneralPurpose::new(&base64::alphabet::STANDARD, config);
 
-    let encoded = base64::encode_engine(data, &engine);
-    let decoded = base64::decode_engine(&encoded, &engine).unwrap();
+    let encoded = engine.encode(data);
+    let decoded = engine.decode(&encoded).unwrap();
     assert_eq!(data, decoded.as_slice());
 });
diff --git a/fuzz/fuzzers/roundtrip_random_config.rs b/fuzz/fuzzers/roundtrip_random_config.rs
index 55d7465..e59a371 100644
--- a/fuzz/fuzzers/roundtrip_random_config.rs
+++ b/fuzz/fuzzers/roundtrip_random_config.rs
@@ -9,7 +9,7 @@ mod utils;
 fuzz_target!(|data: &[u8]| {
     let engine = utils::random_engine(data);
 
-    let encoded = encode_engine(data, &engine);
-    let decoded = decode_engine(&encoded, &engine).unwrap();
+    let encoded = engine.encode(data);
+    let decoded = engine.decode(&encoded).unwrap();
     assert_eq!(data, decoded.as_slice());
 });
diff --git a/fuzz/fuzzers/utils.rs b/fuzz/fuzzers/utils.rs
index 0158032..2e9df59 100644
--- a/fuzz/fuzzers/utils.rs
+++ b/fuzz/fuzzers/utils.rs
@@ -2,12 +2,12 @@ extern crate rand;
 extern crate rand_pcg;
 extern crate sha2;
 
-use base64::{alphabet, engine::{self, fast_portable}};
+use base64::{alphabet, engine::{self, general_purpose}};
 use self::rand::{Rng, SeedableRng};
 use self::rand_pcg::Pcg32;
 use self::sha2::Digest as _;
 
-pub fn random_engine(data: &[u8]) -> fast_portable::FastPortable {
+pub fn random_engine(data: &[u8]) -> general_purpose::GeneralPurpose {
     // use sha256 of data as rng seed so it's repeatable
     let mut hasher = sha2::Sha256::new();
     hasher.update(data);
@@ -30,10 +30,10 @@ pub fn random_engine(data: &[u8]) -> fast_portable::FastPortable {
     } else {
         engine::DecodePaddingMode::RequireNone
     };
-    let config = fast_portable::FastPortableConfig::new()
+    let config = general_purpose::GeneralPurposeConfig::new()
         .with_encode_padding(encode_padding)
         .with_decode_allow_trailing_bits(rng.gen())
         .with_decode_padding_mode(decode_padding);
 
-    fast_portable::FastPortable::from(&alphabet, config)
+    general_purpose::GeneralPurpose::new(&alphabet, config)
 }
diff --git a/src/alphabet.rs b/src/alphabet.rs
index 9f177d0..7cd1b57 100644
--- a/src/alphabet.rs
+++ b/src/alphabet.rs
@@ -1,7 +1,7 @@
 //! Provides [Alphabet] and constants for alphabets commonly used in the wild.
 
 use crate::PAD_BYTE;
-use core::{convert, fmt};
+use core::fmt;
 #[cfg(any(feature = "std", test))]
 use std::error;
 
@@ -13,11 +13,11 @@ const ALPHABET_SIZE: usize = 64;
 /// can be made via `from_str` or the `TryFrom<str>` implementation.
 ///
 /// ```
-/// let custom = base64::alphabet::Alphabet::from_str("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap();
+/// let custom = base64::alphabet::Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap();
 ///
-/// let engine = base64::engine::fast_portable::FastPortable::from(
+/// let engine = base64::engine::GeneralPurpose::new(
 ///     &custom,
-///     base64::engine::fast_portable::PAD);
+///     base64::engine::general_purpose::PAD);
 /// ```
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub struct Alphabet {
@@ -44,7 +44,7 @@ impl Alphabet {
     /// Create an `Alphabet` from a string of 64 unique printable ASCII bytes.
     ///
     /// The `=` byte is not allowed as it is used for padding.
-    pub const fn from_str(alphabet: &str) -> Result<Self, ParseAlphabetError> {
+    pub const fn new(alphabet: &str) -> Result<Self, ParseAlphabetError> {
         let bytes = alphabet.as_bytes();
         if bytes.len() != ALPHABET_SIZE {
             return Err(ParseAlphabetError::InvalidLength);
@@ -93,11 +93,11 @@ impl Alphabet {
     }
 }
 
-impl convert::TryFrom<&str> for Alphabet {
+impl TryFrom<&str> for Alphabet {
     type Error = ParseAlphabetError;
 
     fn try_from(value: &str) -> Result<Self, Self::Error> {
-        Self::from_str(value)
+        Self::new(value)
     }
 }
 
@@ -177,7 +177,7 @@ mod tests {
     fn detects_duplicate_start() {
         assert_eq!(
             ParseAlphabetError::DuplicatedByte(b'A'),
-            Alphabet::from_str("AACDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
+            Alphabet::new("AACDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
                 .unwrap_err()
         );
     }
@@ -186,7 +186,7 @@ mod tests {
     fn detects_duplicate_end() {
         assert_eq!(
             ParseAlphabetError::DuplicatedByte(b'/'),
-            Alphabet::from_str("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789//")
+            Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789//")
                 .unwrap_err()
         );
     }
@@ -195,7 +195,7 @@ mod tests {
     fn detects_duplicate_middle() {
         assert_eq!(
             ParseAlphabetError::DuplicatedByte(b'Z'),
-            Alphabet::from_str("ABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/")
+            Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/")
                 .unwrap_err()
         );
     }
@@ -204,7 +204,7 @@ mod tests {
     fn detects_length() {
         assert_eq!(
             ParseAlphabetError::InvalidLength,
-            Alphabet::from_str(
+            Alphabet::new(
                 "xxxxxxxxxABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/",
             )
             .unwrap_err()
@@ -215,7 +215,7 @@ mod tests {
     fn detects_padding() {
         assert_eq!(
             ParseAlphabetError::ReservedByte(b'='),
-            Alphabet::from_str("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+=")
+            Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+=")
                 .unwrap_err()
         );
     }
@@ -225,10 +225,8 @@ mod tests {
         // form feed
         assert_eq!(
             ParseAlphabetError::UnprintableByte(0xc),
-            Alphabet::from_str(
-                "\x0cBCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
-            )
-            .unwrap_err()
+            Alphabet::new("\x0cBCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
+                .unwrap_err()
         );
     }
 
diff --git a/src/chunked_encoder.rs b/src/chunked_encoder.rs
index 9c23e52..0457259 100644
--- a/src/chunked_encoder.rs
+++ b/src/chunked_encoder.rs
@@ -18,13 +18,13 @@ pub trait Sink {
 const BUF_SIZE: usize = 1024;
 
 /// A base64 encoder that emits encoded bytes in chunks without heap allocation.
-pub struct ChunkedEncoder<'e, E: Engine> {
+pub struct ChunkedEncoder<'e, E: Engine + ?Sized> {
     engine: &'e E,
     max_input_chunk_len: usize,
 }
 
-impl<'e, E: Engine> ChunkedEncoder<'e, E> {
-    pub fn from(engine: &'e E) -> ChunkedEncoder<'e, E> {
+impl<'e, E: Engine + ?Sized> ChunkedEncoder<'e, E> {
+    pub fn new(engine: &'e E) -> ChunkedEncoder<'e, E> {
         ChunkedEncoder {
             engine,
             max_input_chunk_len: max_input_length(BUF_SIZE, engine.config().encode_padding()),
@@ -41,7 +41,7 @@ impl<'e, E: Engine> ChunkedEncoder<'e, E> {
 
             let chunk = &bytes[input_index..(input_index + input_chunk_len)];
 
-            let mut b64_bytes_written = self.engine.encode(chunk, &mut encode_buf);
+            let mut b64_bytes_written = self.engine.internal_encode(chunk, &mut encode_buf);
 
             input_index += input_chunk_len;
             let more_input_left = input_index < bytes.len();
@@ -88,7 +88,7 @@ pub(crate) struct StringSink<'a> {
 
 #[cfg(any(feature = "alloc", feature = "std", test))]
 impl<'a> StringSink<'a> {
-    pub(crate) fn from(s: &mut String) -> StringSink {
+    pub(crate) fn new(s: &mut String) -> StringSink {
         StringSink { string: s }
     }
 }
@@ -111,10 +111,11 @@ pub mod tests {
         Rng, SeedableRng,
     };
 
-    use crate::alphabet::STANDARD;
-    use crate::encode_engine_string;
-    use crate::engine::fast_portable::{FastPortable, FastPortableConfig, PAD};
-    use crate::tests::random_engine;
+    use crate::{
+        alphabet::STANDARD,
+        engine::general_purpose::{GeneralPurpose, GeneralPurposeConfig, PAD},
+        tests::random_engine,
+    };
 
     use super::*;
 
@@ -193,18 +194,18 @@ pub mod tests {
             let engine = random_engine(&mut rng);
 
             let chunk_encoded_string = sink_test_helper.encode_to_string(&engine, &input_buf);
-            encode_engine_string(&input_buf, &mut output_buf, &engine);
+            engine.encode_string(&input_buf, &mut output_buf);
 
             assert_eq!(output_buf, chunk_encoded_string, "input len={}", buf_len);
         }
     }
 
-    fn chunked_encode_str(bytes: &[u8], config: FastPortableConfig) -> String {
+    fn chunked_encode_str(bytes: &[u8], config: GeneralPurposeConfig) -> String {
         let mut s = String::new();
 
-        let mut sink = StringSink::from(&mut s);
-        let engine = FastPortable::from(&STANDARD, config);
-        let encoder = ChunkedEncoder::from(&engine);
+        let mut sink = StringSink::new(&mut s);
+        let engine = GeneralPurpose::new(&STANDARD, config);
+        let encoder = ChunkedEncoder::new(&engine);
         encoder.encode(bytes, &mut sink).unwrap();
 
         s
@@ -219,9 +220,9 @@ pub mod tests {
 
     impl SinkTestHelper for StringSinkTestHelper {
         fn encode_to_string<E: Engine>(&self, engine: &E, bytes: &[u8]) -> String {
-            let encoder = ChunkedEncoder::from(engine);
+            let encoder = ChunkedEncoder::new(engine);
             let mut s = String::new();
-            let mut sink = StringSink::from(&mut s);
+            let mut sink = StringSink::new(&mut s);
             encoder.encode(bytes, &mut sink).unwrap();
 
             s
diff --git a/src/decode.rs b/src/decode.rs
index f6d636b..0471518 100644
--- a/src/decode.rs
+++ b/src/decode.rs
@@ -1,8 +1,4 @@
-#[cfg(any(feature = "alloc", feature = "std", test))]
-use crate::engine::DecodeEstimate;
-use crate::engine::Engine;
-#[cfg(any(feature = "alloc", feature = "std", test))]
-use crate::engine::DEFAULT_ENGINE;
+use crate::engine::{general_purpose::STANDARD, DecodeEstimate, Engine};
 #[cfg(any(feature = "alloc", feature = "std", test))]
 use alloc::vec::Vec;
 use core::fmt;
@@ -46,153 +42,121 @@ impl fmt::Display for DecodeError {
 
 #[cfg(any(feature = "std", test))]
 impl error::Error for DecodeError {
-    fn description(&self) -> &str {
-        match *self {
-            Self::InvalidByte(_, _) => "invalid byte",
-            Self::InvalidLength => "invalid length",
-            Self::InvalidLastSymbol(_, _) => "invalid last symbol",
-            Self::InvalidPadding => "invalid padding",
+    fn cause(&self) -> Option<&dyn error::Error> {
+        None
+    }
+}
+
+/// Errors that can occur while decoding into a slice.
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub enum DecodeSliceError {
+    /// A [DecodeError] occurred
+    DecodeError(DecodeError),
+    /// The provided slice _may_ be too small.
+    ///
+    /// The check is conservative (assumes the last triplet of output bytes will all be needed).
+    OutputSliceTooSmall,
+}
+
+impl fmt::Display for DecodeSliceError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Self::DecodeError(e) => write!(f, "DecodeError: {}", e),
+            Self::OutputSliceTooSmall => write!(f, "Output slice too small"),
         }
     }
+}
 
+#[cfg(any(feature = "std", test))]
+impl error::Error for DecodeSliceError {
     fn cause(&self) -> Option<&dyn error::Error> {
-        None
+        match self {
+            DecodeSliceError::DecodeError(e) => Some(e),
+            DecodeSliceError::OutputSliceTooSmall => None,
+        }
     }
 }
 
-///Decode base64 using the [default engine](DEFAULT_ENGINE).
-///Returns a `Result` containing a `Vec<u8>`.
-///
-///# Example
+impl From<DecodeError> for DecodeSliceError {
+    fn from(e: DecodeError) -> Self {
+        DecodeSliceError::DecodeError(e)
+    }
+}
+
+/// Decode base64 using the [`STANDARD` engine](STANDARD).
 ///
-///```rust
-/// let bytes = base64::decode("aGVsbG8gd29ybGQ=").unwrap();
-/// println!("{:?}", bytes);
-///```
+/// See [Engine::decode].
+#[deprecated(since = "0.21.0", note = "Use Engine::decode")]
 #[cfg(any(feature = "alloc", feature = "std", test))]
 pub fn decode<T: AsRef<[u8]>>(input: T) -> Result<Vec<u8>, DecodeError> {
-    decode_engine(input, &DEFAULT_ENGINE)
+    STANDARD.decode(input)
 }
 
-///Decode from string reference as octets using the specified [Engine].
-///Returns a `Result` containing a `Vec<u8>`.
-///
-///# Example
-///
-///```rust
-///    let bytes = base64::decode_engine(
-///        "aGVsbG8gd29ybGR+Cg==",
-///        &base64::engine::DEFAULT_ENGINE,
-///    ).unwrap();
-///    println!("{:?}", bytes);
+/// Decode from string reference as octets using the specified [Engine].
 ///
-///    // custom engine setup
-///    let bytes_url = base64::decode_engine(
-///        "aGVsbG8gaW50ZXJuZXR-Cg",
-///        &base64::engine::fast_portable::FastPortable::from(
-///            &base64::alphabet::URL_SAFE,
-///            base64::engine::fast_portable::NO_PAD),
-///
-///    ).unwrap();
-///    println!("{:?}", bytes_url);
-///```
+/// See [Engine::decode].
+///Returns a `Result` containing a `Vec<u8>`.
+#[deprecated(since = "0.21.0", note = "Use Engine::decode")]
 #[cfg(any(feature = "alloc", feature = "std", test))]
 pub fn decode_engine<E: Engine, T: AsRef<[u8]>>(
     input: T,
     engine: &E,
 ) -> Result<Vec<u8>, DecodeError> {
-    let decoded_length_estimate = (input
-        .as_ref()
-        .len()
-        .checked_add(3)
-        .expect("decoded length calculation overflow"))
-        / 4
-        * 3;
-    let mut buffer = Vec::<u8>::with_capacity(decoded_length_estimate);
-    decode_engine_vec(input, &mut buffer, engine).map(|_| buffer)
+    engine.decode(input)
 }
 
-///Decode from string reference as octets.
-///Writes into the supplied `Vec`, which may allocate if its internal buffer isn't big enough.
-///Returns a `Result` containing an empty tuple, aka `()`.
-///
-///# Example
-///
-///```rust
-///const URL_SAFE_ENGINE: base64::engine::fast_portable::FastPortable =
-///    base64::engine::fast_portable::FastPortable::from(
-///        &base64::alphabet::URL_SAFE,
-///        base64::engine::fast_portable::PAD);
+/// Decode from string reference as octets.
 ///
-///fn main() {
-///    let mut buffer = Vec::<u8>::new();
-///    // with the default engine
-///    base64::decode_engine_vec(
-///        "aGVsbG8gd29ybGR+Cg==",
-///        &mut buffer,
-///        &base64::engine::DEFAULT_ENGINE
-///    ).unwrap();
-///    println!("{:?}", buffer);
-///
-///    buffer.clear();
-///
-///    // with a custom engine
-///    base64::decode_engine_vec(
-///        "aGVsbG8gaW50ZXJuZXR-Cg==",
-///        &mut buffer,
-///        &URL_SAFE_ENGINE
-///    ).unwrap();
-///    println!("{:?}", buffer);
-///}
-///```
+/// See [Engine::decode_vec].
 #[cfg(any(feature = "alloc", feature = "std", test))]
+#[deprecated(since = "0.21.0", note = "Use Engine::decode_vec")]
 pub fn decode_engine_vec<E: Engine, T: AsRef<[u8]>>(
     input: T,
     buffer: &mut Vec<u8>,
     engine: &E,
 ) -> Result<(), DecodeError> {
-    let input_bytes = input.as_ref();
-
-    let starting_output_len = buffer.len();
-
-    let estimate = engine.decoded_length_estimate(input_bytes.len());
-    let total_len_estimate = estimate
-        .decoded_length_estimate()
-        .checked_add(starting_output_len)
-        .expect("Overflow when calculating output buffer length");
-    buffer.resize(total_len_estimate, 0);
-
-    let buffer_slice = &mut buffer.as_mut_slice()[starting_output_len..];
-    let bytes_written = engine.decode(input_bytes, buffer_slice, estimate)?;
-
-    buffer.truncate(starting_output_len + bytes_written);
-
-    Ok(())
+    engine.decode_vec(input, buffer)
 }
 
 /// Decode the input into the provided output slice.
 ///
-/// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end).
-///
-/// If you don't know ahead of time what the decoded length should be, size your buffer with a
-/// conservative estimate for the decoded length of an input: 3 bytes of output for every 4 bytes of
-/// input, rounded up, or in other words `(input_len + 3) / 4 * 3`.
-///
-/// # Panics
-///
-/// If the slice is not large enough, this will panic.
+/// See [Engine::decode_slice].
+#[deprecated(since = "0.21.0", note = "Use Engine::decode_slice")]
 pub fn decode_engine_slice<E: Engine, T: AsRef<[u8]>>(
     input: T,
     output: &mut [u8],
     engine: &E,
-) -> Result<usize, DecodeError> {
-    let input_bytes = input.as_ref();
-
-    engine.decode(
-        input_bytes,
-        output,
-        engine.decoded_length_estimate(input_bytes.len()),
-    )
+) -> Result<usize, DecodeSliceError> {
+    engine.decode_slice(input, output)
+}
+
+/// Returns a conservative estimate of the decoded size of `encoded_len` base64 symbols (rounded up
+/// to the next group of 3 decoded bytes).
+///
+/// The resulting length will be a safe choice for the size of a decode buffer, but may have up to
+/// 2 trailing bytes that won't end up being needed.
+///
+/// # Examples
+///
+/// ```
+/// use base64::decoded_len_estimate;
+///
+/// assert_eq!(3, decoded_len_estimate(1));
+/// assert_eq!(3, decoded_len_estimate(2));
+/// assert_eq!(3, decoded_len_estimate(3));
+/// assert_eq!(3, decoded_len_estimate(4));
+/// // start of the next quad of encoded symbols
+/// assert_eq!(6, decoded_len_estimate(5));
+/// ```
+///
+/// # Panics
+///
+/// Panics if decoded length estimation overflows.
+/// This would happen for sizes within a few bytes of the maximum value of `usize`.
+pub fn decoded_len_estimate(encoded_len: usize) -> usize {
+    STANDARD
+        .internal_decoded_len_estimate(encoded_len)
+        .decoded_len_estimate()
 }
 
 #[cfg(test)]
@@ -200,8 +164,7 @@ mod tests {
     use super::*;
     use crate::{
         alphabet,
-        encode::encode_engine_string,
-        engine::{fast_portable, fast_portable::FastPortable, Config},
+        engine::{general_purpose, Config, GeneralPurpose},
         tests::{assert_encode_sanity, random_engine},
     };
     use rand::{
@@ -236,7 +199,7 @@ mod tests {
             }
 
             let engine = random_engine(&mut rng);
-            encode_engine_string(&orig_data, &mut encoded_data, &engine);
+            engine.encode_string(&orig_data, &mut encoded_data);
             assert_encode_sanity(&encoded_data, engine.config().encode_padding(), input_len);
 
             let prefix_len = prefix_len_range.sample(&mut rng);
@@ -250,9 +213,13 @@ mod tests {
             decoded_with_prefix.copy_from_slice(&prefix);
 
             // decode into the non-empty buf
-            decode_engine_vec(&encoded_data, &mut decoded_with_prefix, &engine).unwrap();
+            engine
+                .decode_vec(&encoded_data, &mut decoded_with_prefix)
+                .unwrap();
             // also decode into the empty buf
-            decode_engine_vec(&encoded_data, &mut decoded_without_prefix, &engine).unwrap();
+            engine
+                .decode_vec(&encoded_data, &mut decoded_without_prefix)
+                .unwrap();
 
             assert_eq!(
                 prefix_len + decoded_without_prefix.len(),
@@ -268,7 +235,66 @@ mod tests {
     }
 
     #[test]
-    fn decode_into_slice_doesnt_clobber_existing_prefix_or_suffix() {
+    fn decode_slice_doesnt_clobber_existing_prefix_or_suffix() {
+        do_decode_slice_doesnt_clobber_existing_prefix_or_suffix(|e, input, output| {
+            e.decode_slice(input, output).unwrap()
+        })
+    }
+
+    #[test]
+    fn decode_slice_unchecked_doesnt_clobber_existing_prefix_or_suffix() {
+        do_decode_slice_doesnt_clobber_existing_prefix_or_suffix(|e, input, output| {
+            e.decode_slice_unchecked(input, output).unwrap()
+        })
+    }
+
+    #[test]
+    fn decode_engine_estimation_works_for_various_lengths() {
+        let engine = GeneralPurpose::new(&alphabet::STANDARD, general_purpose::NO_PAD);
+        for num_prefix_quads in 0..100 {
+            for suffix in &["AA", "AAA", "AAAA"] {
+                let mut prefix = "AAAA".repeat(num_prefix_quads);
+                prefix.push_str(suffix);
+                // make sure no overflow (and thus a panic) occurs
+                let res = engine.decode(prefix);
+                assert!(res.is_ok());
+            }
+        }
+    }
+
+    #[test]
+    fn decode_slice_output_length_errors() {
+        for num_quads in 1..100 {
+            let input = "AAAA".repeat(num_quads);
+            let mut vec = vec![0; (num_quads - 1) * 3];
+            assert_eq!(
+                DecodeSliceError::OutputSliceTooSmall,
+                STANDARD.decode_slice(&input, &mut vec).unwrap_err()
+            );
+            vec.push(0);
+            assert_eq!(
+                DecodeSliceError::OutputSliceTooSmall,
+                STANDARD.decode_slice(&input, &mut vec).unwrap_err()
+            );
+            vec.push(0);
+            assert_eq!(
+                DecodeSliceError::OutputSliceTooSmall,
+                STANDARD.decode_slice(&input, &mut vec).unwrap_err()
+            );
+            vec.push(0);
+            // now it works
+            assert_eq!(
+                num_quads * 3,
+                STANDARD.decode_slice(&input, &mut vec).unwrap()
+            );
+        }
+    }
+
+    fn do_decode_slice_doesnt_clobber_existing_prefix_or_suffix<
+        F: Fn(&GeneralPurpose, &[u8], &mut [u8]) -> usize,
+    >(
+        call_decode: F,
+    ) {
         let mut orig_data = Vec::new();
         let mut encoded_data = String::new();
         let mut decode_buf = Vec::new();
@@ -291,7 +317,7 @@ mod tests {
             }
 
             let engine = random_engine(&mut rng);
-            encode_engine_string(&orig_data, &mut encoded_data, &engine);
+            engine.encode_string(&orig_data, &mut encoded_data);
             assert_encode_sanity(&encoded_data, engine.config().encode_padding(), input_len);
 
             // fill the buffer with random garbage, long enough to have some room before and after
@@ -306,7 +332,7 @@ mod tests {
 
             // decode into the non-empty buf
             let decode_bytes_written =
-                decode_engine_slice(&encoded_data, &mut decode_buf[offset..], &engine).unwrap();
+                call_decode(&engine, encoded_data.as_bytes(), &mut decode_buf[offset..]);
 
             assert_eq!(orig_data.len(), decode_bytes_written);
             assert_eq!(
@@ -320,53 +346,4 @@ mod tests {
             );
         }
     }
-
-    #[test]
-    fn decode_into_slice_fits_in_precisely_sized_slice() {
-        let mut orig_data = Vec::new();
-        let mut encoded_data = String::new();
-        let mut decode_buf = Vec::new();
-
-        let input_len_range = Uniform::new(0, 1000);
-        let mut rng = rand::rngs::SmallRng::from_entropy();
-
-        for _ in 0..10_000 {
-            orig_data.clear();
-            encoded_data.clear();
-            decode_buf.clear();
-
-            let input_len = input_len_range.sample(&mut rng);
-
-            for _ in 0..input_len {
-                orig_data.push(rng.gen());
-            }
-
-            let engine = random_engine(&mut rng);
-            encode_engine_string(&orig_data, &mut encoded_data, &engine);
-            assert_encode_sanity(&encoded_data, engine.config().encode_padding(), input_len);
-
-            decode_buf.resize(input_len, 0);
-
-            // decode into the non-empty buf
-            let decode_bytes_written =
-                decode_engine_slice(&encoded_data, &mut decode_buf[..], &engine).unwrap();
-
-            assert_eq!(orig_data.len(), decode_bytes_written);
-            assert_eq!(orig_data, decode_buf);
-        }
-    }
-
-    #[test]
-    fn decode_engine_estimation_works_for_various_lengths() {
-        let engine = FastPortable::from(&alphabet::STANDARD, fast_portable::NO_PAD);
-        for num_prefix_quads in 0..100 {
-            for suffix in &["AA", "AAA", "AAAA"] {
-                let mut prefix = "AAAA".repeat(num_prefix_quads);
-                prefix.push_str(suffix);
-                // make sure no overflow (and thus a panic) occurs
-                let res = decode_engine(prefix, &engine);
-                assert!(res.is_ok());
-            }
-        }
-    }
 }
diff --git a/src/display.rs b/src/display.rs
index e2fa9e2..fc292f1 100644
--- a/src/display.rs
+++ b/src/display.rs
@@ -1,11 +1,10 @@
 //! Enables base64'd output anywhere you might use a `Display` implementation, like a format string.
 //!
 //! ```
-//! use base64::display::Base64Display;
-//! use base64::engine::DEFAULT_ENGINE;
+//! use base64::{display::Base64Display, engine::general_purpose::STANDARD};
 //!
 //! let data = vec![0x0, 0x1, 0x2, 0x3];
-//! let wrapper = Base64Display::from(&data, &DEFAULT_ENGINE);
+//! let wrapper = Base64Display::new(&data, &STANDARD);
 //!
 //! assert_eq!("base64: AAECAw==", format!("base64: {}", wrapper));
 //! ```
@@ -23,10 +22,10 @@ pub struct Base64Display<'a, 'e, E: Engine> {
 
 impl<'a, 'e, E: Engine> Base64Display<'a, 'e, E> {
     /// Create a `Base64Display` with the provided engine.
-    pub fn from(bytes: &'a [u8], engine: &'e E) -> Base64Display<'a, 'e, E> {
+    pub fn new(bytes: &'a [u8], engine: &'e E) -> Base64Display<'a, 'e, E> {
         Base64Display {
             bytes,
-            chunked_encoder: ChunkedEncoder::from(engine),
+            chunked_encoder: ChunkedEncoder::new(engine),
         }
     }
 }
@@ -59,17 +58,17 @@ mod tests {
         chunked_encode_matches_normal_encode_random, SinkTestHelper,
     };
     use super::*;
-    use crate::engine::DEFAULT_ENGINE;
+    use crate::engine::general_purpose::STANDARD;
 
     #[test]
     fn basic_display() {
         assert_eq!(
             "~$Zm9vYmFy#*",
-            format!("~${}#*", Base64Display::from(b"foobar", &DEFAULT_ENGINE))
+            format!("~${}#*", Base64Display::new(b"foobar", &STANDARD))
         );
         assert_eq!(
             "~$Zm9vYmFyZg==#*",
-            format!("~${}#*", Base64Display::from(b"foobarf", &DEFAULT_ENGINE))
+            format!("~${}#*", Base64Display::new(b"foobarf", &STANDARD))
         );
     }
 
@@ -83,7 +82,7 @@ mod tests {
 
     impl SinkTestHelper for DisplaySinkTestHelper {
         fn encode_to_string<E: Engine>(&self, engine: &E, bytes: &[u8]) -> String {
-            format!("{}", Base64Display::from(bytes, engine))
+            format!("{}", Base64Display::new(bytes, engine))
         }
     }
 }
diff --git a/src/encode.rs b/src/encode.rs
index 575beb6..cb17650 100644
--- a/src/encode.rs
+++ b/src/encode.rs
@@ -1,147 +1,59 @@
 #[cfg(any(feature = "alloc", feature = "std", test))]
-use crate::chunked_encoder;
+use alloc::string::String;
+use core::fmt;
+#[cfg(any(feature = "std", test))]
+use std::error;
+
 #[cfg(any(feature = "alloc", feature = "std", test))]
-use crate::engine::DEFAULT_ENGINE;
+use crate::engine::general_purpose::STANDARD;
 use crate::engine::{Config, Engine};
 use crate::PAD_BYTE;
-#[cfg(any(feature = "alloc", feature = "std", test))]
-use alloc::{string::String, vec};
 
-///Encode arbitrary octets as base64 using the [default engine](DEFAULT_ENGINE).
-///Returns a `String`.
-///
-///# Example
+/// Encode arbitrary octets as base64 using the [`STANDARD` engine](STANDARD).
 ///
-///```rust
-/// let b64 = base64::encode(b"hello world");
-/// println!("{}", b64);
-///```
+/// See [Engine::encode].
+#[allow(unused)]
+#[deprecated(since = "0.21.0", note = "Use Engine::encode")]
 #[cfg(any(feature = "alloc", feature = "std", test))]
 pub fn encode<T: AsRef<[u8]>>(input: T) -> String {
-    encode_engine(input, &DEFAULT_ENGINE)
+    STANDARD.encode(input)
 }
 
-///Encode arbitrary octets as base64 using the provided `Engine`.
-///Returns a `String`.
-///
-///# Example
-///
-///```rust
-///const URL_SAFE_ENGINE: base64::engine::fast_portable::FastPortable =
-///    base64::engine::fast_portable::FastPortable::from(
-///        &base64::alphabet::URL_SAFE,
-///        base64::engine::fast_portable::NO_PAD);
+///Encode arbitrary octets as base64 using the provided `Engine` into a new `String`.
 ///
-///    let b64 = base64::encode_engine(
-///        b"hello world~",
-///        &base64::engine::DEFAULT_ENGINE
-///        );
-///    println!("{}", b64);
-///
-///    let b64_url = base64::encode_engine(
-///        b"hello internet~",
-///        &URL_SAFE_ENGINE
-///        );
-///    println!("{}", b64_url);
-///```
+/// See [Engine::encode].
+#[allow(unused)]
+#[deprecated(since = "0.21.0", note = "Use Engine::encode")]
 #[cfg(any(feature = "alloc", feature = "std", test))]
 pub fn encode_engine<E: Engine, T: AsRef<[u8]>>(input: T, engine: &E) -> String {
-    let encoded_size = encoded_len(input.as_ref().len(), engine.config().encode_padding())
-        .expect("integer overflow when calculating buffer size");
-    let mut buf = vec![0; encoded_size];
-
-    encode_with_padding(input.as_ref(), &mut buf[..], engine, encoded_size);
-
-    String::from_utf8(buf).expect("Invalid UTF8")
+    engine.encode(input)
 }
 
-///Encode arbitrary octets as base64.
-///Writes into the supplied `String`, which may allocate if its internal buffer isn't big enough.
-///
-///# Example
-///
-///```rust
-///const URL_SAFE_ENGINE: base64::engine::fast_portable::FastPortable =
-///    base64::engine::fast_portable::FastPortable::from(
-///        &base64::alphabet::URL_SAFE,
-///        base64::engine::fast_portable::NO_PAD);
-///fn main() {
-///    let mut buf = String::new();
-///    base64::encode_engine_string(
-///        b"hello world~",
-///        &mut buf,
-///        &base64::engine::DEFAULT_ENGINE);
-///    println!("{}", buf);
+///Encode arbitrary octets as base64 into a supplied `String`.
 ///
-///    buf.clear();
-///    base64::encode_engine_string(
-///        b"hello internet~",
-///        &mut buf,
-///        &URL_SAFE_ENGINE);
-///    println!("{}", buf);
-///}
-///```
+/// See [Engine::encode_string].
+#[allow(unused)]
+#[deprecated(since = "0.21.0", note = "Use Engine::encode_string")]
 #[cfg(any(feature = "alloc", feature = "std", test))]
 pub fn encode_engine_string<E: Engine, T: AsRef<[u8]>>(
     input: T,
     output_buf: &mut String,
     engine: &E,
 ) {
-    let input_bytes = input.as_ref();
-
-    {
-        let mut sink = chunked_encoder::StringSink::from(output_buf);
-        let encoder = chunked_encoder::ChunkedEncoder::from(engine);
-
-        encoder
-            .encode(input_bytes, &mut sink)
-            .expect("Writing to a String shouldn't fail");
-    }
+    engine.encode_string(input, output_buf)
 }
 
-/// Encode arbitrary octets as base64.
-/// Writes into the supplied output buffer.
-///
-/// This is useful if you wish to avoid allocation entirely (e.g. encoding into a stack-resident
-/// or statically-allocated buffer).
-///
-/// # Panics
-///
-/// If `output` is too small to hold the encoded version of `input`, a panic will result.
-///
-/// # Example
-///
-/// ```rust
-/// let s = b"hello internet!";
-/// let mut buf = Vec::new();
-/// // make sure we'll have a slice big enough for base64 + padding
-/// buf.resize(s.len() * 4 / 3 + 4, 0);
-///
-/// let bytes_written = base64::encode_engine_slice(
-///     s,
-///     &mut buf,
-///     &base64::engine::DEFAULT_ENGINE);
+/// Encode arbitrary octets as base64 into a supplied slice.
 ///
-/// // shorten our vec down to just what was written
-/// buf.truncate(bytes_written);
-///
-/// assert_eq!(s, base64::decode(&buf).unwrap().as_slice());
-/// ```
+/// See [Engine::encode_slice].
+#[allow(unused)]
+#[deprecated(since = "0.21.0", note = "Use Engine::encode_slice")]
 pub fn encode_engine_slice<E: Engine, T: AsRef<[u8]>>(
     input: T,
     output_buf: &mut [u8],
     engine: &E,
-) -> usize {
-    let input_bytes = input.as_ref();
-
-    let encoded_size = encoded_len(input_bytes.len(), engine.config().encode_padding())
-        .expect("usize overflow when calculating buffer size");
-
-    let b64_output = &mut output_buf[0..encoded_size];
-
-    encode_with_padding(input_bytes, b64_output, engine, encoded_size);
-
-    encoded_size
+) -> Result<usize, EncodeSliceError> {
+    engine.encode_slice(input, output_buf)
 }
 
 /// B64-encode and pad (if configured).
@@ -154,7 +66,7 @@ pub fn encode_engine_slice<E: Engine, T: AsRef<[u8]>>(
 /// `output` must be of size `encoded_size`.
 ///
 /// All bytes in `output` will be written to since it is exactly the size of the output.
-fn encode_with_padding<E: Engine>(
+pub(crate) fn encode_with_padding<E: Engine + ?Sized>(
     input: &[u8],
     output: &mut [u8],
     engine: &E,
@@ -162,7 +74,7 @@ fn encode_with_padding<E: Engine>(
 ) {
     debug_assert_eq!(expected_encoded_size, output.len());
 
-    let b64_bytes_written = engine.encode(input, output);
+    let b64_bytes_written = engine.internal_encode(input, output);
 
     let padding_bytes = if engine.config().encode_padding() {
         add_padding(input.len(), &mut output[b64_bytes_written..])
@@ -180,7 +92,8 @@ fn encode_with_padding<E: Engine>(
 /// Calculate the base64 encoded length for a given input length, optionally including any
 /// appropriate padding bytes.
 ///
-/// Returns `None` if the encoded length can't be represented in `usize`.
+/// Returns `None` if the encoded length can't be represented in `usize`. This will happen for
+/// input lengths in approximately the top quarter of the range of `usize`.
 pub fn encoded_len(bytes_len: usize, padding: bool) -> Option<usize> {
     let rem = bytes_len % 3;
 
@@ -208,7 +121,7 @@ pub fn encoded_len(bytes_len: usize, padding: bool) -> Option<usize> {
 /// `output` is the slice where padding should be written, of length at least 2.
 ///
 /// Returns the number of padding bytes written.
-pub fn add_padding(input_len: usize, output: &mut [u8]) -> usize {
+pub(crate) fn add_padding(input_len: usize, output: &mut [u8]) -> usize {
     // TODO base on encoded len to use cheaper mod by 4 (aka & 7)
     let rem = input_len % 3;
     let mut bytes_written = 0;
@@ -220,48 +133,68 @@ pub fn add_padding(input_len: usize, output: &mut [u8]) -> usize {
     bytes_written
 }
 
+/// Errors that can occur while encoding into a slice.
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub enum EncodeSliceError {
+    /// The provided slice is too small.
+    OutputSliceTooSmall,
+}
+
+impl fmt::Display for EncodeSliceError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Self::OutputSliceTooSmall => write!(f, "Output slice too small"),
+        }
+    }
+}
+
+#[cfg(any(feature = "std", test))]
+impl error::Error for EncodeSliceError {
+    fn cause(&self) -> Option<&dyn error::Error> {
+        None
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
+
     use crate::{
-        decode::decode_engine_vec,
-        tests::{assert_encode_sanity, random_config},
+        alphabet,
+        engine::general_purpose::{GeneralPurpose, NO_PAD, STANDARD},
+        tests::{assert_encode_sanity, random_config, random_engine},
     };
-
-    use crate::alphabet::{IMAP_MUTF7, STANDARD, URL_SAFE};
-    use crate::engine::fast_portable::{FastPortable, NO_PAD};
-    use crate::tests::random_engine;
     use rand::{
         distributions::{Distribution, Uniform},
         Rng, SeedableRng,
     };
     use std::str;
 
-    const URL_SAFE_NO_PAD_ENGINE: FastPortable = FastPortable::from(&URL_SAFE, NO_PAD);
+    const URL_SAFE_NO_PAD_ENGINE: GeneralPurpose = GeneralPurpose::new(&alphabet::URL_SAFE, NO_PAD);
 
     #[test]
     fn encoded_size_correct_standard() {
-        assert_encoded_length(0, 0, &DEFAULT_ENGINE, true);
+        assert_encoded_length(0, 0, &STANDARD, true);
 
-        assert_encoded_length(1, 4, &DEFAULT_ENGINE, true);
-        assert_encoded_length(2, 4, &DEFAULT_ENGINE, true);
-        assert_encoded_length(3, 4, &DEFAULT_ENGINE, true);
+        assert_encoded_length(1, 4, &STANDARD, true);
+        assert_encoded_length(2, 4, &STANDARD, true);
+        assert_encoded_length(3, 4, &STANDARD, true);
 
-        assert_encoded_length(4, 8, &DEFAULT_ENGINE, true);
-        assert_encoded_length(5, 8, &DEFAULT_ENGINE, true);
-        assert_encoded_length(6, 8, &DEFAULT_ENGINE, true);
+        assert_encoded_length(4, 8, &STANDARD, true);
+        assert_encoded_length(5, 8, &STANDARD, true);
+        assert_encoded_length(6, 8, &STANDARD, true);
 
-        assert_encoded_length(7, 12, &DEFAULT_ENGINE, true);
-        assert_encoded_length(8, 12, &DEFAULT_ENGINE, true);
-        assert_encoded_length(9, 12, &DEFAULT_ENGINE, true);
+        assert_encoded_length(7, 12, &STANDARD, true);
+        assert_encoded_length(8, 12, &STANDARD, true);
+        assert_encoded_length(9, 12, &STANDARD, true);
 
-        assert_encoded_length(54, 72, &DEFAULT_ENGINE, true);
+        assert_encoded_length(54, 72, &STANDARD, true);
 
-        assert_encoded_length(55, 76, &DEFAULT_ENGINE, true);
-        assert_encoded_length(56, 76, &DEFAULT_ENGINE, true);
-        assert_encoded_length(57, 76, &DEFAULT_ENGINE, true);
+        assert_encoded_length(55, 76, &STANDARD, true);
+        assert_encoded_length(56, 76, &STANDARD, true);
+        assert_encoded_length(57, 76, &STANDARD, true);
 
-        assert_encoded_length(58, 80, &DEFAULT_ENGINE, true);
+        assert_encoded_length(58, 80, &STANDARD, true);
     }
 
     #[test]
@@ -291,7 +224,7 @@ mod tests {
 
     #[test]
     fn encoded_size_overflow() {
-        assert_eq!(None, encoded_len(std::usize::MAX, true));
+        assert_eq!(None, encoded_len(usize::MAX, true));
     }
 
     #[test]
@@ -329,8 +262,8 @@ mod tests {
             encoded_data_with_prefix.push_str(&prefix);
 
             let engine = random_engine(&mut rng);
-            encode_engine_string(&orig_data, &mut encoded_data_no_prefix, &engine);
-            encode_engine_string(&orig_data, &mut encoded_data_with_prefix, &engine);
+            engine.encode_string(&orig_data, &mut encoded_data_no_prefix);
+            engine.encode_string(&orig_data, &mut encoded_data_with_prefix);
 
             assert_eq!(
                 encoded_data_no_prefix.len() + prefix_len,
@@ -352,7 +285,9 @@ mod tests {
 
             assert_eq!(prefix, encoded_data_with_prefix);
 
-            decode_engine_vec(&encoded_data_no_prefix, &mut decoded, &engine).unwrap();
+            engine
+                .decode_vec(&encoded_data_no_prefix, &mut decoded)
+                .unwrap();
             assert_eq!(orig_data, decoded);
         }
     }
@@ -393,11 +328,11 @@ mod tests {
 
             assert_eq!(
                 encoded_size,
-                encode_engine_slice(&orig_data, &mut encoded_data, &engine)
+                engine.encode_slice(&orig_data, &mut encoded_data).unwrap()
             );
 
             assert_encode_sanity(
-                std::str::from_utf8(&encoded_data[0..encoded_size]).unwrap(),
+                str::from_utf8(&encoded_data[0..encoded_size]).unwrap(),
                 engine.config().encode_padding(),
                 input_len,
             );
@@ -407,50 +342,9 @@ mod tests {
                 &encoded_data_original_state[encoded_size..]
             );
 
-            decode_engine_vec(&encoded_data[0..encoded_size], &mut decoded, &engine).unwrap();
-            assert_eq!(orig_data, decoded);
-        }
-    }
-
-    #[test]
-    fn encode_engine_slice_fits_into_precisely_sized_slice() {
-        let mut orig_data = Vec::new();
-        let mut encoded_data = Vec::new();
-        let mut decoded = Vec::new();
-
-        let input_len_range = Uniform::new(0, 1000);
-
-        let mut rng = rand::rngs::SmallRng::from_entropy();
-
-        for _ in 0..10_000 {
-            orig_data.clear();
-            encoded_data.clear();
-            decoded.clear();
-
-            let input_len = input_len_range.sample(&mut rng);
-
-            for _ in 0..input_len {
-                orig_data.push(rng.gen());
-            }
-
-            let engine = random_engine(&mut rng);
-
-            let encoded_size = encoded_len(input_len, engine.config().encode_padding()).unwrap();
-
-            encoded_data.resize(encoded_size, 0);
-
-            assert_eq!(
-                encoded_size,
-                encode_engine_slice(&orig_data, &mut encoded_data, &engine)
-            );
-
-            assert_encode_sanity(
-                std::str::from_utf8(&encoded_data[0..encoded_size]).unwrap(),
-                engine.config().encode_padding(),
-                input_len,
-            );
-
-            decode_engine_vec(&encoded_data[0..encoded_size], &mut decoded, &engine).unwrap();
+            engine
+                .decode_vec(&encoded_data[0..encoded_size], &mut decoded)
+                .unwrap();
             assert_eq!(orig_data, decoded);
         }
     }
@@ -485,7 +379,7 @@ mod tests {
 
             let orig_output_buf = output.clone();
 
-            let bytes_written = engine.encode(&input, &mut output);
+            let bytes_written = engine.internal_encode(&input, &mut output);
 
             // make sure the part beyond bytes_written is the same garbage it was before
             assert_eq!(orig_output_buf[bytes_written..], output[bytes_written..]);
@@ -576,7 +470,7 @@ mod tests {
             bytes.push(rng.gen());
         }
 
-        let encoded = encode_engine(&bytes, engine);
+        let encoded = engine.encode(&bytes);
         assert_encode_sanity(&encoded, padded, input_len);
 
         assert_eq!(enc_len, encoded.len());
@@ -585,8 +479,10 @@ mod tests {
     #[test]
     fn encode_imap() {
         assert_eq!(
-            encode_engine(b"\xFB\xFF", &FastPortable::from(&IMAP_MUTF7, NO_PAD)),
-            encode_engine(b"\xFB\xFF", &FastPortable::from(&STANDARD, NO_PAD)).replace('/', ",")
+            &GeneralPurpose::new(&alphabet::IMAP_MUTF7, NO_PAD).encode(b"\xFB\xFF"),
+            &GeneralPurpose::new(&alphabet::STANDARD, NO_PAD)
+                .encode(b"\xFB\xFF")
+                .replace('/', ",")
         );
     }
 }
diff --git a/src/engine/fast_portable/decode.rs b/src/engine/general_purpose/decode.rs
similarity index 91%
rename from src/engine/fast_portable/decode.rs
rename to src/engine/general_purpose/decode.rs
index 5b67043..e9fd788 100644
--- a/src/engine/fast_portable/decode.rs
+++ b/src/engine/general_purpose/decode.rs
@@ -1,5 +1,5 @@
 use crate::{
-    engine::{fast_portable::INVALID_VALUE, DecodeEstimate, DecodePaddingMode},
+    engine::{general_purpose::INVALID_VALUE, DecodeEstimate, DecodePaddingMode},
     DecodeError, PAD_BYTE,
 };
 
@@ -22,24 +22,31 @@ const DECODED_BLOCK_LEN: usize =
     CHUNKS_PER_FAST_LOOP_BLOCK * DECODED_CHUNK_LEN + DECODED_CHUNK_SUFFIX;
 
 #[doc(hidden)]
-pub struct FastPortableEstimate {
+pub struct GeneralPurposeEstimate {
     /// Total number of decode chunks, including a possibly partial last chunk
     num_chunks: usize,
+    decoded_len_estimate: usize,
 }
 
-impl FastPortableEstimate {
-    pub(crate) fn from(input_len: usize) -> Self {
+impl GeneralPurposeEstimate {
+    pub(crate) fn new(encoded_len: usize) -> Self {
         Self {
-            num_chunks: num_chunks(input_len),
+            num_chunks: encoded_len
+                .checked_add(INPUT_CHUNK_LEN - 1)
+                .expect("Overflow when calculating number of chunks in input")
+                / INPUT_CHUNK_LEN,
+            decoded_len_estimate: encoded_len
+                .checked_add(3)
+                .expect("Overflow when calculating decoded len estimate")
+                / 4
+                * 3,
         }
     }
 }
 
-impl DecodeEstimate for FastPortableEstimate {
-    fn decoded_length_estimate(&self) -> usize {
-        self.num_chunks
-            .checked_mul(DECODED_CHUNK_LEN)
-            .expect("Overflow when calculating decoded length")
+impl DecodeEstimate for GeneralPurposeEstimate {
+    fn decoded_len_estimate(&self) -> usize {
+        self.decoded_len_estimate
     }
 }
 
@@ -51,7 +58,7 @@ impl DecodeEstimate for FastPortableEstimate {
 #[inline]
 pub(crate) fn decode_helper(
     input: &[u8],
-    estimate: FastPortableEstimate,
+    estimate: GeneralPurposeEstimate,
     output: &mut [u8],
     decode_table: &[u8; 256],
     decode_allow_trailing_bits: bool,
@@ -287,14 +294,6 @@ fn decode_chunk(
     Ok(())
 }
 
-/// Return the number of input chunks (including a possibly partial final chunk) in the input
-pub(crate) fn num_chunks(input_len: usize) -> usize {
-    input_len
-        .checked_add(INPUT_CHUNK_LEN - 1)
-        .expect("Overflow when calculating number of chunks in input")
-        / INPUT_CHUNK_LEN
-}
-
 /// Decode an 8-byte chunk, but only write the 6 bytes actually decoded instead of including 2
 /// trailing garbage bytes.
 #[inline]
@@ -327,14 +326,14 @@ fn write_u64(output: &mut [u8], value: u64) {
 mod tests {
     use super::*;
 
-    use crate::engine::DEFAULT_ENGINE;
+    use crate::engine::general_purpose::STANDARD;
 
     #[test]
     fn decode_chunk_precise_writes_only_6_bytes() {
         let input = b"Zm9vYmFy"; // "foobar"
         let mut output = [0_u8, 1, 2, 3, 4, 5, 6, 7];
 
-        decode_chunk_precise(&input[..], 0, &DEFAULT_ENGINE.decode_table, &mut output).unwrap();
+        decode_chunk_precise(&input[..], 0, &STANDARD.decode_table, &mut output).unwrap();
         assert_eq!(&vec![b'f', b'o', b'o', b'b', b'a', b'r', 6, 7], &output);
     }
 
@@ -343,7 +342,7 @@ mod tests {
         let input = b"Zm9vYmFy"; // "foobar"
         let mut output = [0_u8, 1, 2, 3, 4, 5, 6, 7];
 
-        decode_chunk(&input[..], 0, &DEFAULT_ENGINE.decode_table, &mut output).unwrap();
+        decode_chunk(&input[..], 0, &STANDARD.decode_table, &mut output).unwrap();
         assert_eq!(&vec![b'f', b'o', b'o', b'b', b'a', b'r', 0, 0], &output);
     }
 }
diff --git a/src/engine/fast_portable/decode_suffix.rs b/src/engine/general_purpose/decode_suffix.rs
similarity index 99%
rename from src/engine/fast_portable/decode_suffix.rs
rename to src/engine/general_purpose/decode_suffix.rs
index 8f896b2..5652035 100644
--- a/src/engine/fast_portable/decode_suffix.rs
+++ b/src/engine/general_purpose/decode_suffix.rs
@@ -1,5 +1,5 @@
 use crate::{
-    engine::{fast_portable::INVALID_VALUE, DecodePaddingMode},
+    engine::{general_purpose::INVALID_VALUE, DecodePaddingMode},
     DecodeError, PAD_BYTE,
 };
 
diff --git a/src/engine/fast_portable/mod.rs b/src/engine/general_purpose/mod.rs
similarity index 87%
rename from src/engine/fast_portable/mod.rs
rename to src/engine/general_purpose/mod.rs
index 9eef9b1..af8897b 100644
--- a/src/engine/fast_portable/mod.rs
+++ b/src/engine/general_purpose/mod.rs
@@ -1,5 +1,6 @@
-//! Provides the [FastPortable] engine and associated config types.
+//! Provides the [GeneralPurpose] engine and associated config types.
 use crate::{
+    alphabet,
     alphabet::Alphabet,
     engine::{Config, DecodePaddingMode},
     DecodeError,
@@ -8,27 +9,27 @@ use core::convert::TryInto;
 
 mod decode;
 pub(crate) mod decode_suffix;
-pub use decode::FastPortableEstimate;
+pub use decode::GeneralPurposeEstimate;
 
 pub(crate) const INVALID_VALUE: u8 = 255;
 
 /// A general-purpose base64 engine.
 ///
 /// - It uses no vector CPU instructions, so it will work on any system.
-/// - It is reasonably fast (~2GiB/s).
+/// - It is reasonably fast (~2-3GiB/s).
 /// - It is not constant-time, though, so it is vulnerable to timing side-channel attacks. For loading cryptographic keys, etc, it is suggested to use the forthcoming constant-time implementation.
-pub struct FastPortable {
+pub struct GeneralPurpose {
     encode_table: [u8; 64],
     decode_table: [u8; 256],
-    config: FastPortableConfig,
+    config: GeneralPurposeConfig,
 }
 
-impl FastPortable {
-    /// Create a `FastPortable` engine from an [Alphabet].
+impl GeneralPurpose {
+    /// Create a `GeneralPurpose` engine from an [Alphabet].
     ///
     /// While not very expensive to initialize, ideally these should be cached
     /// if the engine will be used repeatedly.
-    pub const fn from(alphabet: &Alphabet, config: FastPortableConfig) -> Self {
+    pub const fn new(alphabet: &Alphabet, config: GeneralPurposeConfig) -> Self {
         Self {
             encode_table: encode_table(alphabet),
             decode_table: decode_table(alphabet),
@@ -37,11 +38,11 @@ impl FastPortable {
     }
 }
 
-impl super::Engine for FastPortable {
-    type Config = FastPortableConfig;
-    type DecodeEstimate = FastPortableEstimate;
+impl super::Engine for GeneralPurpose {
+    type Config = GeneralPurposeConfig;
+    type DecodeEstimate = GeneralPurposeEstimate;
 
-    fn encode(&self, input: &[u8], output: &mut [u8]) -> usize {
+    fn internal_encode(&self, input: &[u8], output: &mut [u8]) -> usize {
         let mut input_index: usize = 0;
 
         const BLOCKS_PER_FAST_LOOP: usize = 4;
@@ -160,11 +161,11 @@ impl super::Engine for FastPortable {
         output_index
     }
 
-    fn decoded_length_estimate(&self, input_len: usize) -> Self::DecodeEstimate {
-        FastPortableEstimate::from(input_len)
+    fn internal_decoded_len_estimate(&self, input_len: usize) -> Self::DecodeEstimate {
+        GeneralPurposeEstimate::new(input_len)
     }
 
-    fn decode(
+    fn internal_decode(
         &self,
         input: &[u8],
         output: &mut [u8],
@@ -228,23 +229,23 @@ fn read_u64(s: &[u8]) -> u64 {
 /// Contains configuration parameters for base64 encoding and decoding.
 ///
 /// ```
-/// # use base64::engine::fast_portable::FastPortableConfig;
-/// let config = FastPortableConfig::new()
+/// # use base64::engine::GeneralPurposeConfig;
+/// let config = GeneralPurposeConfig::new()
 ///     .with_encode_padding(false);
 ///     // further customize using `.with_*` methods as needed
 /// ```
 ///
 /// The constants [PAD] and [NO_PAD] cover most use cases.
 ///
-/// To specify the characters used, see [crate::alphabet::Alphabet].
+/// To specify the characters used, see [Alphabet].
 #[derive(Clone, Copy, Debug)]
-pub struct FastPortableConfig {
+pub struct GeneralPurposeConfig {
     encode_padding: bool,
     decode_allow_trailing_bits: bool,
     decode_padding_mode: DecodePaddingMode,
 }
 
-impl FastPortableConfig {
+impl GeneralPurposeConfig {
     /// Create a new config with `padding` = `true`, `decode_allow_trailing_bits` = `false`, and
     /// `decode_padding_mode = DecodePaddingMode::RequireCanonicalPadding`.
     ///
@@ -311,26 +312,38 @@ impl FastPortableConfig {
     }
 }
 
-impl Default for FastPortableConfig {
-    /// Delegates to [FastPortableConfig::new].
+impl Default for GeneralPurposeConfig {
+    /// Delegates to [GeneralPurposeConfig::new].
     fn default() -> Self {
         Self::new()
     }
 }
 
-impl Config for FastPortableConfig {
+impl Config for GeneralPurposeConfig {
     fn encode_padding(&self) -> bool {
         self.encode_padding
     }
 }
 
+/// A [GeneralPurpose] engine using the [alphabet::STANDARD] base64 alphabet and [PAD] config.
+pub const STANDARD: GeneralPurpose = GeneralPurpose::new(&alphabet::STANDARD, PAD);
+
+/// A [GeneralPurpose] engine using the [alphabet::STANDARD] base64 alphabet and [NO_PAD] config.
+pub const STANDARD_NO_PAD: GeneralPurpose = GeneralPurpose::new(&alphabet::STANDARD, NO_PAD);
+
+/// A [GeneralPurpose] engine using the [alphabet::URL_SAFE] base64 alphabet and [PAD] config.
+pub const URL_SAFE: GeneralPurpose = GeneralPurpose::new(&alphabet::URL_SAFE, PAD);
+
+/// A [GeneralPurpose] engine using the [alphabet::URL_SAFE] base64 alphabet and [NO_PAD] config.
+pub const URL_SAFE_NO_PAD: GeneralPurpose = GeneralPurpose::new(&alphabet::URL_SAFE, NO_PAD);
+
 /// Include padding bytes when encoding, and require that they be present when decoding.
 ///
 /// This is the standard per the base64 RFC, but consider using [NO_PAD] instead as padding serves
 /// little purpose in practice.
-pub const PAD: FastPortableConfig = FastPortableConfig::new();
+pub const PAD: GeneralPurposeConfig = GeneralPurposeConfig::new();
 
 /// Don't add padding when encoding, and require no padding when decoding.
-pub const NO_PAD: FastPortableConfig = FastPortableConfig::new()
+pub const NO_PAD: GeneralPurposeConfig = GeneralPurposeConfig::new()
     .with_encode_padding(false)
     .with_decode_padding_mode(DecodePaddingMode::RequireNone);
diff --git a/src/engine/mod.rs b/src/engine/mod.rs
index ad26bb3..12dfaa8 100644
--- a/src/engine/mod.rs
+++ b/src/engine/mod.rs
@@ -1,8 +1,17 @@
 //! Provides the [Engine] abstraction and out of the box implementations.
-use crate::engine::fast_portable::FastPortable;
-use crate::{alphabet, DecodeError};
+#[cfg(any(feature = "alloc", feature = "std", test))]
+use crate::chunked_encoder;
+use crate::{
+    encode::{encode_with_padding, EncodeSliceError},
+    encoded_len, DecodeError, DecodeSliceError,
+};
+#[cfg(any(feature = "alloc", feature = "std", test))]
+use alloc::vec::Vec;
 
-pub mod fast_portable;
+#[cfg(any(feature = "alloc", feature = "std", test))]
+use alloc::{string::String, vec};
+
+pub mod general_purpose;
 
 #[cfg(test)]
 mod naive;
@@ -10,15 +19,19 @@ mod naive;
 #[cfg(test)]
 mod tests;
 
+pub use general_purpose::{GeneralPurpose, GeneralPurposeConfig};
+
 /// An `Engine` provides low-level encoding and decoding operations that all other higher-level parts of the API use. Users of the library will generally not need to implement this.
 ///
 /// Different implementations offer different characteristics. The library currently ships with
-/// a general-purpose [FastPortable] impl that offers good speed and works on any CPU, with more choices
+/// [GeneralPurpose] that offers good speed and works on any CPU, with more choices
 /// coming later, like a constant-time one when side channel resistance is called for, and vendor-specific vectorized ones for more speed.
 ///
-/// See [DEFAULT_ENGINE] if you just want standard base64. Otherwise, when possible, it's
+/// See [general_purpose::STANDARD_NO_PAD] if you just want standard base64. Otherwise, when possible, it's
 /// recommended to store the engine in a `const` so that references to it won't pose any lifetime
 /// issues, and to avoid repeating the cost of engine setup.
+///
+/// Since almost nobody will need to implement `Engine`, docs for internal methods are hidden.
 // When adding an implementation of Engine, include them in the engine test suite:
 // - add an implementation of [engine::tests::EngineWrapper]
 // - add the implementation to the `all_engines` macro
@@ -29,6 +42,9 @@ pub trait Engine: Send + Sync {
     /// The decode estimate used by this engine
     type DecodeEstimate: DecodeEstimate;
 
+    /// This is not meant to be called directly; it is only for `Engine` implementors.
+    /// See the other `encode*` functions on this trait.
+    ///
     /// Encode the `input` bytes into the `output` buffer based on the mapping in `encode_table`.
     ///
     /// `output` will be long enough to hold the encoded data.
@@ -38,16 +54,27 @@ pub trait Engine: Send + Sync {
     /// No padding should be written; that is handled separately.
     ///
     /// Must not write any bytes into the output slice other than the encoded data.
-    fn encode(&self, input: &[u8], output: &mut [u8]) -> usize;
+    #[doc(hidden)]
+    fn internal_encode(&self, input: &[u8], output: &mut [u8]) -> usize;
 
+    /// This is not meant to be called directly; it is only for `Engine` implementors.
+    ///
     /// As an optimization to prevent the decoded length from being calculated twice, it is
     /// sometimes helpful to have a conservative estimate of the decoded size before doing the
     /// decoding, so this calculation is done separately and passed to [Engine::decode()] as needed.
-    fn decoded_length_estimate(&self, input_len: usize) -> Self::DecodeEstimate;
+    ///
+    /// # Panics
+    ///
+    /// Panics if decoded length estimation overflows.
+    #[doc(hidden)]
+    fn internal_decoded_len_estimate(&self, input_len: usize) -> Self::DecodeEstimate;
 
+    /// This is not meant to be called directly; it is only for `Engine` implementors.
+    /// See the other `decode*` functions on this trait.
+    ///
     /// Decode `input` base64 bytes into the `output` buffer.
     ///
-    /// `decode_estimate` is the result of [Engine::decoded_length_estimate()], which is passed in to avoid
+    /// `decode_estimate` is the result of [Engine::internal_decoded_len_estimate()], which is passed in to avoid
     /// calculating it again (expensive on short inputs).`
     ///
     /// Returns the number of bytes written to `output`.
@@ -62,7 +89,12 @@ pub trait Engine: Send + Sync {
     ///
     /// Non-canonical trailing bits in the final tokens or non-canonical padding must be reported as
     /// errors unless the engine is configured otherwise.
-    fn decode(
+    ///
+    /// # Panics
+    ///
+    /// Panics if `output` is too small.
+    #[doc(hidden)]
+    fn internal_decode(
         &self,
         input: &[u8],
         output: &mut [u8],
@@ -71,6 +103,264 @@ pub trait Engine: Send + Sync {
 
     /// Returns the config for this engine.
     fn config(&self) -> &Self::Config;
+
+    /// Encode arbitrary octets as base64 using the provided `Engine`.
+    /// Returns a `String`.
+    ///
+    /// # Example
+    ///
+    /// ```rust
+    /// use base64::{Engine as _, engine::{self, general_purpose}, alphabet};
+    ///
+    /// let b64 = general_purpose::STANDARD.encode(b"hello world~");
+    /// println!("{}", b64);
+    ///
+    /// const CUSTOM_ENGINE: engine::GeneralPurpose =
+    ///     engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::NO_PAD);
+    ///
+    /// let b64_url = CUSTOM_ENGINE.encode(b"hello internet~");
+    #[cfg(any(feature = "alloc", feature = "std", test))]
+    fn encode<T: AsRef<[u8]>>(&self, input: T) -> String {
+        let encoded_size = encoded_len(input.as_ref().len(), self.config().encode_padding())
+            .expect("integer overflow when calculating buffer size");
+        let mut buf = vec![0; encoded_size];
+
+        encode_with_padding(input.as_ref(), &mut buf[..], self, encoded_size);
+
+        String::from_utf8(buf).expect("Invalid UTF8")
+    }
+
+    /// Encode arbitrary octets as base64 into a supplied `String`.
+    /// Writes into the supplied `String`, which may allocate if its internal buffer isn't big enough.
+    ///
+    /// # Example
+    ///
+    /// ```rust
+    /// use base64::{Engine as _, engine::{self, general_purpose}, alphabet};
+    /// const CUSTOM_ENGINE: engine::GeneralPurpose =
+    ///     engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::NO_PAD);
+    ///
+    /// fn main() {
+    ///     let mut buf = String::new();
+    ///     general_purpose::STANDARD.encode_string(b"hello world~", &mut buf);
+    ///     println!("{}", buf);
+    ///
+    ///     buf.clear();
+    ///     CUSTOM_ENGINE.encode_string(b"hello internet~", &mut buf);
+    ///     println!("{}", buf);
+    /// }
+    /// ```
+    #[cfg(any(feature = "alloc", feature = "std", test))]
+    fn encode_string<T: AsRef<[u8]>>(&self, input: T, output_buf: &mut String) {
+        let input_bytes = input.as_ref();
+
+        {
+            let mut sink = chunked_encoder::StringSink::new(output_buf);
+
+            chunked_encoder::ChunkedEncoder::new(self)
+                .encode(input_bytes, &mut sink)
+                .expect("Writing to a String shouldn't fail");
+        }
+    }
+
+    /// Encode arbitrary octets as base64 into a supplied slice.
+    /// Writes into the supplied output buffer.
+    ///
+    /// This is useful if you wish to avoid allocation entirely (e.g. encoding into a stack-resident
+    /// or statically-allocated buffer).
+    ///
+    /// # Example
+    ///
+    /// ```rust
+    /// use base64::{Engine as _, engine::general_purpose};
+    /// let s = b"hello internet!";
+    /// let mut buf = Vec::new();
+    /// // make sure we'll have a slice big enough for base64 + padding
+    /// buf.resize(s.len() * 4 / 3 + 4, 0);
+    ///
+    /// let bytes_written = general_purpose::STANDARD.encode_slice(s, &mut buf).unwrap();
+    ///
+    /// // shorten our vec down to just what was written
+    /// buf.truncate(bytes_written);
+    ///
+    /// assert_eq!(s, general_purpose::STANDARD.decode(&buf).unwrap().as_slice());
+    /// ```
+    fn encode_slice<T: AsRef<[u8]>>(
+        &self,
+        input: T,
+        output_buf: &mut [u8],
+    ) -> Result<usize, EncodeSliceError> {
+        let input_bytes = input.as_ref();
+
+        let encoded_size = encoded_len(input_bytes.len(), self.config().encode_padding())
+            .expect("usize overflow when calculating buffer size");
+
+        if output_buf.len() < encoded_size {
+            return Err(EncodeSliceError::OutputSliceTooSmall);
+        }
+
+        let b64_output = &mut output_buf[0..encoded_size];
+
+        encode_with_padding(input_bytes, b64_output, self, encoded_size);
+
+        Ok(encoded_size)
+    }
+
+    /// Decode from string reference as octets using the specified [Engine].
+    /// Returns a `Result` containing a `Vec<u8>`.
+    ///
+    /// # Example
+    ///
+    /// ```rust
+    /// use base64::{Engine as _, alphabet, engine::{self, general_purpose}};
+    ///
+    /// let bytes = general_purpose::STANDARD
+    ///     .decode("aGVsbG8gd29ybGR+Cg==").unwrap();
+    /// println!("{:?}", bytes);
+    ///
+    /// // custom engine setup
+    /// let bytes_url = engine::GeneralPurpose::new(
+    ///              &alphabet::URL_SAFE,
+    ///              general_purpose::NO_PAD)
+    ///     .decode("aGVsbG8gaW50ZXJuZXR-Cg").unwrap();
+    /// println!("{:?}", bytes_url);
+    /// ```
+    ///
+    /// # Panics
+    ///
+    /// Panics if decoded length estimation overflows.
+    /// This would happen for sizes within a few bytes of the maximum value of `usize`.
+    #[cfg(any(feature = "alloc", feature = "std", test))]
+    fn decode<T: AsRef<[u8]>>(&self, input: T) -> Result<Vec<u8>, DecodeError> {
+        let input_bytes = input.as_ref();
+
+        let estimate = self.internal_decoded_len_estimate(input_bytes.len());
+        let mut buffer = vec![0; estimate.decoded_len_estimate()];
+
+        let bytes_written = self.internal_decode(input_bytes, &mut buffer, estimate)?;
+        buffer.truncate(bytes_written);
+
+        Ok(buffer)
+    }
+
+    /// Decode from string reference as octets.
+    /// Writes into the supplied `Vec`, which may allocate if its internal buffer isn't big enough.
+    /// Returns a `Result` containing an empty tuple, aka `()`.
+    ///
+    /// # Example
+    ///
+    /// ```rust
+    /// use base64::{Engine as _, alphabet, engine::{self, general_purpose}};
+    /// const CUSTOM_ENGINE: engine::GeneralPurpose =
+    ///     engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::PAD);
+    ///
+    /// fn main() {
+    ///     use base64::Engine;
+    ///     let mut buffer = Vec::<u8>::new();
+    ///     // with the default engine
+    ///     general_purpose::STANDARD
+    ///         .decode_vec("aGVsbG8gd29ybGR+Cg==", &mut buffer,).unwrap();
+    ///     println!("{:?}", buffer);
+    ///
+    ///     buffer.clear();
+    ///
+    ///     // with a custom engine
+    ///     CUSTOM_ENGINE.decode_vec(
+    ///         "aGVsbG8gaW50ZXJuZXR-Cg==",
+    ///         &mut buffer,
+    ///     ).unwrap();
+    ///     println!("{:?}", buffer);
+    /// }
+    /// ```
+    ///
+    /// # Panics
+    ///
+    /// Panics if decoded length estimation overflows.
+    /// This would happen for sizes within a few bytes of the maximum value of `usize`.
+    #[cfg(any(feature = "alloc", feature = "std", test))]
+    fn decode_vec<T: AsRef<[u8]>>(
+        &self,
+        input: T,
+        buffer: &mut Vec<u8>,
+    ) -> Result<(), DecodeError> {
+        let input_bytes = input.as_ref();
+
+        let starting_output_len = buffer.len();
+
+        let estimate = self.internal_decoded_len_estimate(input_bytes.len());
+        let total_len_estimate = estimate
+            .decoded_len_estimate()
+            .checked_add(starting_output_len)
+            .expect("Overflow when calculating output buffer length");
+        buffer.resize(total_len_estimate, 0);
+
+        let buffer_slice = &mut buffer.as_mut_slice()[starting_output_len..];
+        let bytes_written = self.internal_decode(input_bytes, buffer_slice, estimate)?;
+
+        buffer.truncate(starting_output_len + bytes_written);
+
+        Ok(())
+    }
+
+    /// Decode the input into the provided output slice.
+    ///
+    /// Returns an error if `output` is smaller than the estimated decoded length.
+    ///
+    /// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end).
+    ///
+    /// See [crate::decoded_len_estimate] for calculating buffer sizes.
+    ///
+    /// See [Engine::decode_slice_unchecked] for a version that panics instead of returning an error
+    /// if the output buffer is too small.
+    ///
+    /// # Panics
+    ///
+    /// Panics if decoded length estimation overflows.
+    /// This would happen for sizes within a few bytes of the maximum value of `usize`.
+    fn decode_slice<T: AsRef<[u8]>>(
+        &self,
+        input: T,
+        output: &mut [u8],
+    ) -> Result<usize, DecodeSliceError> {
+        let input_bytes = input.as_ref();
+
+        let estimate = self.internal_decoded_len_estimate(input_bytes.len());
+        if output.len() < estimate.decoded_len_estimate() {
+            return Err(DecodeSliceError::OutputSliceTooSmall);
+        }
+
+        self.internal_decode(input_bytes, output, estimate)
+            .map_err(|e| e.into())
+    }
+
+    /// Decode the input into the provided output slice.
+    ///
+    /// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end).
+    ///
+    /// See [crate::decoded_len_estimate] for calculating buffer sizes.
+    ///
+    /// See [Engine::decode_slice] for a version that returns an error instead of panicking if the output
+    /// buffer is too small.
+    ///
+    /// # Panics
+    ///
+    /// Panics if decoded length estimation overflows.
+    /// This would happen for sizes within a few bytes of the maximum value of `usize`.
+    ///
+    /// Panics if the provided output buffer is too small for the decoded data.
+    fn decode_slice_unchecked<T: AsRef<[u8]>>(
+        &self,
+        input: T,
+        output: &mut [u8],
+    ) -> Result<usize, DecodeError> {
+        let input_bytes = input.as_ref();
+
+        self.internal_decode(
+            input_bytes,
+            output,
+            self.internal_decoded_len_estimate(input_bytes.len()),
+        )
+    }
 }
 
 /// The minimal level of configuration that engines must support.
@@ -94,13 +384,17 @@ pub trait Config {
 pub trait DecodeEstimate {
     /// Returns a conservative (err on the side of too big) estimate of the decoded length to use
     /// for pre-allocating buffers, etc.
-    fn decoded_length_estimate(&self) -> usize;
+    ///
+    /// The estimate must be no larger than the next largest complete triple of decoded bytes.
+    /// That is, the final quad of tokens to decode may be assumed to be complete with no padding.
+    ///
+    /// # Panics
+    ///
+    /// Panics if decoded length estimation overflows.
+    /// This would happen for sizes within a few bytes of the maximum value of `usize`.
+    fn decoded_len_estimate(&self) -> usize;
 }
 
-/// A [FastPortable] engine using the [crate::alphabet::STANDARD] base64 alphabet and [crate::engine::fast_portable::PAD] config.
-pub const DEFAULT_ENGINE: FastPortable =
-    FastPortable::from(&alphabet::STANDARD, fast_portable::PAD);
-
 /// Controls how pad bytes are handled when decoding.
 ///
 /// Each [Engine] must support at least the behavior indicated by
diff --git a/src/engine/naive.rs b/src/engine/naive.rs
index 138b821..6665c5e 100644
--- a/src/engine/naive.rs
+++ b/src/engine/naive.rs
@@ -1,7 +1,7 @@
 use crate::{
     alphabet::Alphabet,
     engine::{
-        fast_portable::{self, decode_table, encode_table},
+        general_purpose::{self, decode_table, encode_table},
         Config, DecodeEstimate, DecodePaddingMode, Engine,
     },
     DecodeError, PAD_BYTE,
@@ -20,7 +20,7 @@ impl Naive {
     const ENCODE_INPUT_CHUNK_SIZE: usize = 3;
     const DECODE_INPUT_CHUNK_SIZE: usize = 4;
 
-    pub const fn from(alphabet: &Alphabet, config: NaiveConfig) -> Self {
+    pub const fn new(alphabet: &Alphabet, config: NaiveConfig) -> Self {
         Self {
             encode_table: encode_table(alphabet),
             decode_table: decode_table(alphabet),
@@ -31,7 +31,7 @@ impl Naive {
     fn decode_byte_into_u32(&self, offset: usize, byte: u8) -> Result<u32, DecodeError> {
         let decoded = self.decode_table[byte as usize];
 
-        if decoded == fast_portable::INVALID_VALUE {
+        if decoded == general_purpose::INVALID_VALUE {
             return Err(DecodeError::InvalidByte(offset, byte));
         }
 
@@ -43,7 +43,7 @@ impl Engine for Naive {
     type Config = NaiveConfig;
     type DecodeEstimate = NaiveEstimate;
 
-    fn encode(&self, input: &[u8], output: &mut [u8]) -> usize {
+    fn internal_encode(&self, input: &[u8], output: &mut [u8]) -> usize {
         // complete chunks first
 
         const LOW_SIX_BITS: u32 = 0x3F;
@@ -103,11 +103,11 @@ impl Engine for Naive {
         output_index
     }
 
-    fn decoded_length_estimate(&self, input_len: usize) -> Self::DecodeEstimate {
-        NaiveEstimate::from(input_len)
+    fn internal_decoded_len_estimate(&self, input_len: usize) -> Self::DecodeEstimate {
+        NaiveEstimate::new(input_len)
     }
 
-    fn decode(
+    fn internal_decode(
         &self,
         input: &[u8],
         output: &mut [u8],
@@ -117,7 +117,8 @@ impl Engine for Naive {
             // trailing whitespace is so common that it's worth it to check the last byte to
             // possibly return a better error message
             if let Some(b) = input.last() {
-                if *b != PAD_BYTE && self.decode_table[*b as usize] == fast_portable::INVALID_VALUE
+                if *b != PAD_BYTE
+                    && self.decode_table[*b as usize] == general_purpose::INVALID_VALUE
                 {
                     return Err(DecodeError::InvalidByte(input.len() - 1, *b));
                 }
@@ -163,7 +164,7 @@ impl Engine for Naive {
             }
         }
 
-        fast_portable::decode_suffix::decode_suffix(
+        general_purpose::decode_suffix::decode_suffix(
             input,
             input_index,
             output,
@@ -182,12 +183,12 @@ impl Engine for Naive {
 pub struct NaiveEstimate {
     /// remainder from dividing input by `Naive::DECODE_CHUNK_SIZE`
     rem: usize,
-    /// Number of complete `Naive::DECODE_CHUNK_SIZE`-length chunks
+    /// Length of input that is in complete `Naive::DECODE_CHUNK_SIZE`-length chunks
     complete_chunk_len: usize,
 }
 
 impl NaiveEstimate {
-    fn from(input_len: usize) -> Self {
+    fn new(input_len: usize) -> Self {
         let rem = input_len % Naive::DECODE_INPUT_CHUNK_SIZE;
         let complete_chunk_len = input_len - rem;
 
@@ -199,8 +200,8 @@ impl NaiveEstimate {
 }
 
 impl DecodeEstimate for NaiveEstimate {
-    fn decoded_length_estimate(&self) -> usize {
-        (self.complete_chunk_len + 1) * 3
+    fn decoded_len_estimate(&self) -> usize {
+        ((self.complete_chunk_len / 4) + ((self.rem > 0) as usize)) * 3
     }
 }
 
diff --git a/src/engine/tests.rs b/src/engine/tests.rs
index 2c91e81..906bba0 100644
--- a/src/engine/tests.rs
+++ b/src/engine/tests.rs
@@ -2,7 +2,9 @@
 #![allow(unused_variables)]
 
 use rand::{
-    self, distributions, distributions::Distribution as _, rngs, Rng as _, SeedableRng as _,
+    self,
+    distributions::{self, Distribution as _},
+    rngs, Rng as _, SeedableRng as _,
 };
 use rstest::rstest;
 use rstest_reuse::{apply, template};
@@ -10,8 +12,9 @@ use std::{collections, fmt};
 
 use crate::{
     alphabet::{Alphabet, STANDARD},
-    decode_engine, encode, encode_engine_slice,
-    engine::{fast_portable, naive, Config, DecodePaddingMode, Engine},
+    encode::add_padding,
+    encoded_len,
+    engine::{general_purpose, naive, Config, DecodeEstimate, DecodePaddingMode, Engine},
     tests::{assert_encode_sanity, random_alphabet, random_config},
     DecodeError, PAD_BYTE,
 };
@@ -19,7 +22,7 @@ use crate::{
 // the case::foo syntax includes the "foo" in the generated test method names
 #[template]
 #[rstest(engine_wrapper,
-case::fast_portable(FastPortableWrapper {}),
+case::general_purpose(GeneralPurposeWrapper {}),
 case::naive(NaiveWrapper {}),
 )]
 fn all_engines<E: EngineWrapper>(engine_wrapper: E) {}
@@ -47,13 +50,14 @@ fn rfc_test_vectors_std_alphabet<E: EngineWrapper>(engine_wrapper: E) {
             let mut encode_buf = [0_u8; 8];
             let mut decode_buf = [0_u8; 6];
 
-            let encode_len = engine_no_padding.encode(orig.as_bytes(), &mut encode_buf[..]);
+            let encode_len =
+                engine_no_padding.internal_encode(orig.as_bytes(), &mut encode_buf[..]);
             assert_eq!(
                 &encoded_without_padding,
                 &std::str::from_utf8(&encode_buf[0..encode_len]).unwrap()
             );
             let decode_len = engine_no_padding
-                .decode_ez(encoded_without_padding.as_bytes(), &mut decode_buf[..])
+                .decode_slice_unchecked(encoded_without_padding.as_bytes(), &mut decode_buf[..])
                 .unwrap();
             assert_eq!(orig.len(), decode_len);
 
@@ -66,7 +70,7 @@ fn rfc_test_vectors_std_alphabet<E: EngineWrapper>(engine_wrapper: E) {
             if encoded.as_bytes().contains(&PAD_BYTE) {
                 assert_eq!(
                     Err(DecodeError::InvalidPadding),
-                    engine_no_padding.decode_ez_str_vec(encoded)
+                    engine_no_padding.decode(encoded)
                 )
             }
         }
@@ -76,17 +80,17 @@ fn rfc_test_vectors_std_alphabet<E: EngineWrapper>(engine_wrapper: E) {
             let mut encode_buf = [0_u8; 8];
             let mut decode_buf = [0_u8; 6];
 
-            let encode_len = engine.encode(orig.as_bytes(), &mut encode_buf[..]);
+            let encode_len = engine.internal_encode(orig.as_bytes(), &mut encode_buf[..]);
             assert_eq!(
                 // doesn't have padding added yet
                 &encoded_without_padding,
                 &std::str::from_utf8(&encode_buf[0..encode_len]).unwrap()
             );
-            let pad_len = encode::add_padding(orig.len(), &mut encode_buf[encode_len..]);
+            let pad_len = add_padding(orig.len(), &mut encode_buf[encode_len..]);
             assert_eq!(encoded.as_bytes(), &encode_buf[..encode_len + pad_len]);
 
             let decode_len = engine
-                .decode_ez(encoded.as_bytes(), &mut decode_buf[..])
+                .decode_slice_unchecked(encoded.as_bytes(), &mut decode_buf[..])
                 .unwrap();
             assert_eq!(orig.len(), decode_len);
 
@@ -99,7 +103,7 @@ fn rfc_test_vectors_std_alphabet<E: EngineWrapper>(engine_wrapper: E) {
             if encoded.as_bytes().contains(&PAD_BYTE) {
                 assert_eq!(
                     Err(DecodeError::InvalidPadding),
-                    engine.decode_ez_str_vec(encoded_without_padding)
+                    engine.decode(encoded_without_padding)
                 )
             }
         }
@@ -135,7 +139,7 @@ fn roundtrip_random<E: EngineWrapper>(engine_wrapper: E) {
         decode_buf.resize(orig_len, 0);
 
         let dec_len = engine
-            .decode_ez(&encode_buf[0..encoded_len], &mut decode_buf[..])
+            .decode_slice_unchecked(&encode_buf[0..encoded_len], &mut decode_buf[..])
             .unwrap();
 
         assert_eq!(orig_len, dec_len);
@@ -168,9 +172,10 @@ fn encode_doesnt_write_extra_bytes<E: EngineWrapper>(engine_wrapper: E) {
         fill_rand_len(&mut encode_buf, &mut rng, prefix_len * 2 + orig_len * 2);
         encode_buf_backup.extend_from_slice(&encode_buf[..]);
 
-        let expected_encode_len_no_pad = encode::encoded_len(orig_len, false).unwrap();
+        let expected_encode_len_no_pad = encoded_len(orig_len, false).unwrap();
 
-        let encoded_len_no_pad = engine.encode(&orig_data[..], &mut encode_buf[prefix_len..]);
+        let encoded_len_no_pad =
+            engine.internal_encode(&orig_data[..], &mut encode_buf[prefix_len..]);
         assert_eq!(expected_encode_len_no_pad, encoded_len_no_pad);
 
         // no writes past what it claimed to write
@@ -190,19 +195,62 @@ fn encode_doesnt_write_extra_bytes<E: EngineWrapper>(engine_wrapper: E) {
 
         // pad so we can decode it in case our random engine requires padding
         let pad_len = if padded {
-            encode::add_padding(orig_len, &mut encode_buf[prefix_len + encoded_len_no_pad..])
+            add_padding(orig_len, &mut encode_buf[prefix_len + encoded_len_no_pad..])
         } else {
             0
         };
 
         assert_eq!(
             orig_data,
-            decode_engine(
-                &encode_buf[prefix_len..(prefix_len + encoded_len_no_pad + pad_len)],
-                &engine,
-            )
-            .unwrap()
+            engine
+                .decode(&encode_buf[prefix_len..(prefix_len + encoded_len_no_pad + pad_len)],)
+                .unwrap()
+        );
+    }
+}
+
+#[apply(all_engines)]
+fn encode_engine_slice_fits_into_precisely_sized_slice<E: EngineWrapper>(engine_wrapper: E) {
+    let mut orig_data = Vec::new();
+    let mut encoded_data = Vec::new();
+    let mut decoded = Vec::new();
+
+    let input_len_range = distributions::Uniform::new(0, 1000);
+
+    let mut rng = rngs::SmallRng::from_entropy();
+
+    for _ in 0..10_000 {
+        orig_data.clear();
+        encoded_data.clear();
+        decoded.clear();
+
+        let input_len = input_len_range.sample(&mut rng);
+
+        for _ in 0..input_len {
+            orig_data.push(rng.gen());
+        }
+
+        let engine = E::random(&mut rng);
+
+        let encoded_size = encoded_len(input_len, engine.config().encode_padding()).unwrap();
+
+        encoded_data.resize(encoded_size, 0);
+
+        assert_eq!(
+            encoded_size,
+            engine.encode_slice(&orig_data, &mut encoded_data).unwrap()
+        );
+
+        assert_encode_sanity(
+            std::str::from_utf8(&encoded_data[0..encoded_size]).unwrap(),
+            engine.config().encode_padding(),
+            input_len,
         );
+
+        engine
+            .decode_vec(&encoded_data[0..encoded_size], &mut decoded)
+            .unwrap();
+        assert_eq!(orig_data, decoded);
     }
 }
 
@@ -232,7 +280,9 @@ where
         let orig_len = fill_rand(&mut orig_data, &mut rng, &len_range);
         encode_buf.resize(orig_len * 2 + 100, 0);
 
-        let encoded_len = encode_engine_slice(&orig_data[..], &mut encode_buf[..], &engine);
+        let encoded_len = engine
+            .encode_slice(&orig_data[..], &mut encode_buf[..])
+            .unwrap();
         encode_buf.truncate(encoded_len);
 
         // oversize decode buffer so we can easily tell if it writes anything more than
@@ -243,7 +293,7 @@ where
         decode_buf_backup.extend_from_slice(&decode_buf[..]);
 
         let dec_len = engine
-            .decode_ez(&encode_buf, &mut decode_buf[prefix_len..])
+            .decode_slice_unchecked(&encode_buf, &mut decode_buf[prefix_len..])
             .unwrap();
 
         assert_eq!(orig_len, dec_len);
@@ -264,8 +314,8 @@ fn decode_detect_invalid_last_symbol<E: EngineWrapper>(engine_wrapper: E) {
     // 0xFF -> "/w==", so all letters > w, 0-9, and '+', '/' should get InvalidLastSymbol
     let engine = E::standard();
 
-    assert_eq!(Ok(vec![0x89, 0x85]), engine.decode_ez_str_vec("iYU="));
-    assert_eq!(Ok(vec![0xFF]), engine.decode_ez_str_vec("/w=="));
+    assert_eq!(Ok(vec![0x89, 0x85]), engine.decode("iYU="));
+    assert_eq!(Ok(vec![0xFF]), engine.decode("/w=="));
 
     for (suffix, offset) in vec![
         // suffix, offset of bad byte from start of suffix
@@ -291,7 +341,7 @@ fn decode_detect_invalid_last_symbol<E: EngineWrapper>(engine_wrapper: E) {
                     encoded.len() - 4 + offset,
                     suffix.as_bytes()[offset],
                 )),
-                engine.decode_ez_str_vec(encoded.as_str())
+                engine.decode(encoded.as_str())
             );
         }
     }
@@ -311,15 +361,12 @@ fn decode_detect_invalid_last_symbol_when_length_is_also_invalid<E: EngineWrappe
         let mut input = vec![b'A'; len];
 
         // with a valid last char, it's InvalidLength
-        assert_eq!(
-            Err(DecodeError::InvalidLength),
-            decode_engine(&input, &engine)
-        );
+        assert_eq!(Err(DecodeError::InvalidLength), engine.decode(&input));
         // after mangling the last char, it's InvalidByte
         input[len - 1] = b'"';
         assert_eq!(
             Err(DecodeError::InvalidByte(len - 1, b'"')),
-            decode_engine(&input, &engine)
+            engine.decode(&input)
         );
     }
 }
@@ -334,8 +381,8 @@ fn decode_detect_invalid_last_symbol_every_possible_two_symbols<E: EngineWrapper
 
     for b in 0_u8..=255 {
         let mut b64 = vec![0_u8; 4];
-        assert_eq!(2, engine.encode(&[b], &mut b64[..]));
-        let _ = encode::add_padding(1, &mut b64[2..]);
+        assert_eq!(2, engine.internal_encode(&[b], &mut b64[..]));
+        let _ = add_padding(1, &mut b64[2..]);
 
         assert!(base64_to_bytes.insert(b64, vec![b]).is_none());
     }
@@ -362,7 +409,7 @@ fn decode_detect_invalid_last_symbol_every_possible_two_symbols<E: EngineWrapper
                 match base64_to_bytes.get(&symbols[..]) {
                     Some(bytes) => {
                         let res = engine
-                            .decode_ez_vec(&clone)
+                            .decode(&clone)
                             // remove prefix
                             .map(|decoded| decoded[decoded_prefix_len..].to_vec());
 
@@ -370,7 +417,7 @@ fn decode_detect_invalid_last_symbol_every_possible_two_symbols<E: EngineWrapper
                     }
                     None => assert_eq!(
                         Err(DecodeError::InvalidLastSymbol(1, s2)),
-                        engine.decode_ez_vec(&symbols[..])
+                        engine.decode(&symbols[..])
                     ),
                 }
             }
@@ -394,8 +441,8 @@ fn decode_detect_invalid_last_symbol_every_possible_three_symbols<E: EngineWrapp
         for b2 in 0_u8..=255 {
             bytes[1] = b2;
             let mut b64 = vec![0_u8; 4];
-            assert_eq!(3, engine.encode(&bytes, &mut b64[..]));
-            let _ = encode::add_padding(2, &mut b64[3..]);
+            assert_eq!(3, engine.internal_encode(&bytes, &mut b64[..]));
+            let _ = add_padding(2, &mut b64[3..]);
 
             let mut v = Vec::with_capacity(2);
             v.extend_from_slice(&bytes[..]);
@@ -427,7 +474,7 @@ fn decode_detect_invalid_last_symbol_every_possible_three_symbols<E: EngineWrapp
                     match base64_to_bytes.get(&symbols[..]) {
                         Some(bytes) => {
                             let res = engine
-                                .decode_ez_vec(&input)
+                                .decode(&input)
                                 // remove prefix
                                 .map(|decoded| decoded[decoded_prefix_len..].to_vec());
 
@@ -435,7 +482,7 @@ fn decode_detect_invalid_last_symbol_every_possible_three_symbols<E: EngineWrapp
                         }
                         None => assert_eq!(
                             Err(DecodeError::InvalidLastSymbol(2, s3)),
-                            engine.decode_ez_vec(&symbols[..])
+                            engine.decode(&symbols[..])
                         ),
                     }
                 }
@@ -458,7 +505,7 @@ fn decode_invalid_trailing_bits_ignored_when_configured<E: EngineWrapper>(engine
         data: &str,
     ) {
         let prefixed = prefixed_data(input, b64_prefix_len, data);
-        let decoded = engine.decode_ez_str_vec(prefixed);
+        let decoded = engine.decode(prefixed);
         // prefix is always complete chunks
         let decoded_prefix_len = b64_prefix_len / 4 * 3;
         assert_eq!(
@@ -473,10 +520,10 @@ fn decode_invalid_trailing_bits_ignored_when_configured<E: EngineWrapper>(engine
 
         // example from https://github.com/marshallpierce/rust-base64/issues/75
         assert!(strict
-            .decode_ez_str_vec(prefixed_data(&mut input, prefix.len(), "/w=="))
+            .decode(prefixed_data(&mut input, prefix.len(), "/w=="))
             .is_ok());
         assert!(strict
-            .decode_ez_str_vec(prefixed_data(&mut input, prefix.len(), "iYU="))
+            .decode(prefixed_data(&mut input, prefix.len(), "iYU="))
             .is_ok());
         // trailing 01
         assert_tolerant_decode(&forgiving, &mut input, prefix.len(), vec![255], "/x==");
@@ -539,7 +586,7 @@ fn decode_invalid_byte_error<E: EngineWrapper>(engine_wrapper: E) {
 
         assert_eq!(
             Err(DecodeError::InvalidByte(invalid_index, invalid_byte)),
-            engine.decode_ez(
+            engine.decode_slice_unchecked(
                 &encode_buf[0..encoded_len_with_padding],
                 &mut decode_buf[..],
             )
@@ -587,7 +634,7 @@ fn decode_padding_before_final_non_padding_char_error_invalid_byte<E: EngineWrap
 
                 assert_eq!(
                     Err(DecodeError::InvalidByte(padding_start, PAD_BYTE)),
-                    engine.decode_ez_vec(&encoded),
+                    engine.decode(&encoded),
                 );
             }
         }
@@ -625,14 +672,11 @@ fn decode_padding_starts_before_final_chunk_error_invalid_byte<E: EngineWrapper>
             encoded[padding_start..].fill(PAD_BYTE);
 
             if suffix_len == 1 {
-                assert_eq!(
-                    Err(DecodeError::InvalidLength),
-                    engine.decode_ez_vec(&encoded),
-                );
+                assert_eq!(Err(DecodeError::InvalidLength), engine.decode(&encoded),);
             } else {
                 assert_eq!(
                     Err(DecodeError::InvalidByte(padding_start, PAD_BYTE)),
-                    engine.decode_ez_vec(&encoded),
+                    engine.decode(&encoded),
                 );
             }
         }
@@ -666,17 +710,14 @@ fn decode_too_little_data_before_padding_error_invalid_byte<E: EngineWrapper>(en
                 encoded.resize(encoded.len() + padding_len, PAD_BYTE);
 
                 if suffix_data_len + padding_len == 1 {
-                    assert_eq!(
-                        Err(DecodeError::InvalidLength),
-                        engine.decode_ez_vec(&encoded),
-                    );
+                    assert_eq!(Err(DecodeError::InvalidLength), engine.decode(&encoded),);
                 } else {
                     assert_eq!(
                         Err(DecodeError::InvalidByte(
                             prefix_quad_len * 4 + suffix_data_len,
                             PAD_BYTE,
                         )),
-                        engine.decode_ez_vec(&encoded),
+                        engine.decode(&encoded),
                         "suffix data len {} pad len {}",
                         suffix_data_len,
                         padding_len
@@ -692,7 +733,7 @@ fn decode_too_little_data_before_padding_error_invalid_byte<E: EngineWrapper>(en
 fn decode_malleability_test_case_3_byte_suffix_valid<E: EngineWrapper>(engine_wrapper: E) {
     assert_eq!(
         b"Hello".as_slice(),
-        &E::standard().decode_ez_str_vec("SGVsbG8=").unwrap()
+        &E::standard().decode("SGVsbG8=").unwrap()
     );
 }
 
@@ -703,7 +744,7 @@ fn decode_malleability_test_case_3_byte_suffix_invalid_trailing_symbol<E: Engine
 ) {
     assert_eq!(
         DecodeError::InvalidLastSymbol(6, 0x39),
-        E::standard().decode_ez_str_vec("SGVsbG9=").unwrap_err()
+        E::standard().decode("SGVsbG9=").unwrap_err()
     );
 }
 
@@ -712,7 +753,7 @@ fn decode_malleability_test_case_3_byte_suffix_invalid_trailing_symbol<E: Engine
 fn decode_malleability_test_case_3_byte_suffix_no_padding<E: EngineWrapper>(engine_wrapper: E) {
     assert_eq!(
         DecodeError::InvalidPadding,
-        E::standard().decode_ez_str_vec("SGVsbG9").unwrap_err()
+        E::standard().decode("SGVsbG9").unwrap_err()
     );
 }
 
@@ -723,7 +764,7 @@ fn decode_malleability_test_case_2_byte_suffix_valid_two_padding_symbols<E: Engi
 ) {
     assert_eq!(
         b"Hell".as_slice(),
-        &E::standard().decode_ez_str_vec("SGVsbA==").unwrap()
+        &E::standard().decode("SGVsbA==").unwrap()
     );
 }
 
@@ -732,7 +773,7 @@ fn decode_malleability_test_case_2_byte_suffix_valid_two_padding_symbols<E: Engi
 fn decode_malleability_test_case_2_byte_suffix_short_padding<E: EngineWrapper>(engine_wrapper: E) {
     assert_eq!(
         DecodeError::InvalidPadding,
-        E::standard().decode_ez_str_vec("SGVsbA=").unwrap_err()
+        E::standard().decode("SGVsbA=").unwrap_err()
     );
 }
 
@@ -741,7 +782,7 @@ fn decode_malleability_test_case_2_byte_suffix_short_padding<E: EngineWrapper>(e
 fn decode_malleability_test_case_2_byte_suffix_no_padding<E: EngineWrapper>(engine_wrapper: E) {
     assert_eq!(
         DecodeError::InvalidPadding,
-        E::standard().decode_ez_str_vec("SGVsbA").unwrap_err()
+        E::standard().decode("SGVsbA").unwrap_err()
     );
 }
 
@@ -752,7 +793,7 @@ fn decode_malleability_test_case_2_byte_suffix_too_much_padding<E: EngineWrapper
 ) {
     assert_eq!(
         DecodeError::InvalidByte(6, PAD_BYTE),
-        E::standard().decode_ez_str_vec("SGVsbA====").unwrap_err()
+        E::standard().decode("SGVsbA====").unwrap_err()
     );
 }
 
@@ -776,7 +817,7 @@ fn decode_pad_mode_requires_canonical_rejects_non_canonical<E: EngineWrapper>(en
             let mut encoded = "AAAA".repeat(num_prefix_quads);
             encoded.push_str(suffix);
 
-            let res = engine.decode_ez_str_vec(&encoded);
+            let res = engine.decode(&encoded);
 
             assert_eq!(Err(DecodeError::InvalidPadding), res);
         }
@@ -803,7 +844,7 @@ fn decode_pad_mode_requires_no_padding_rejects_any_padding<E: EngineWrapper>(eng
             let mut encoded = "AAAA".repeat(num_prefix_quads);
             encoded.push_str(suffix);
 
-            let res = engine.decode_ez_str_vec(&encoded);
+            let res = engine.decode(&encoded);
 
             assert_eq!(Err(DecodeError::InvalidPadding), res);
         }
@@ -854,7 +895,7 @@ fn decode_pad_byte_in_penultimate_quad_error<E: EngineWrapper>(engine_wrapper: E
                             num_prefix_quads * 4 + num_valid_bytes_penultimate_quad,
                             b'=',
                         ),
-                        engine.decode_ez_str_vec(&s).unwrap_err()
+                        engine.decode(&s).unwrap_err()
                     );
                 }
             }
@@ -888,7 +929,7 @@ fn decode_bytes_after_padding_in_final_quad_error<E: EngineWrapper>(engine_wrapp
                         num_prefix_quads * 4 + (3 - bytes_after_padding),
                         b'='
                     ),
-                    engine.decode_ez_str_vec(&s).unwrap_err()
+                    engine.decode(&s).unwrap_err()
                 );
             }
         }
@@ -908,7 +949,7 @@ fn decode_absurd_pad_error<E: EngineWrapper>(engine_wrapper: E) {
             // first padding byte
             assert_eq!(
                 DecodeError::InvalidByte(num_prefix_quads * 4, b'='),
-                engine.decode_ez_str_vec(&s).unwrap_err()
+                engine.decode(&s).unwrap_err()
             );
         }
     }
@@ -928,14 +969,11 @@ fn decode_too_much_padding_returns_error<E: EngineWrapper>(engine_wrapper: E) {
                 s.push_str(&padding);
 
                 if pad_bytes % 4 == 1 {
-                    assert_eq!(
-                        DecodeError::InvalidLength,
-                        engine.decode_ez_str_vec(&s).unwrap_err()
-                    );
+                    assert_eq!(DecodeError::InvalidLength, engine.decode(&s).unwrap_err());
                 } else {
                     assert_eq!(
                         DecodeError::InvalidByte(num_prefix_quads * 4, b'='),
-                        engine.decode_ez_str_vec(&s).unwrap_err()
+                        engine.decode(&s).unwrap_err()
                     );
                 }
             }
@@ -957,14 +995,11 @@ fn decode_padding_followed_by_non_padding_returns_error<E: EngineWrapper>(engine
                 s.push('E');
 
                 if pad_bytes % 4 == 0 {
-                    assert_eq!(
-                        DecodeError::InvalidLength,
-                        engine.decode_ez_str_vec(&s).unwrap_err()
-                    );
+                    assert_eq!(DecodeError::InvalidLength, engine.decode(&s).unwrap_err());
                 } else {
                     assert_eq!(
                         DecodeError::InvalidByte(num_prefix_quads * 4, b'='),
-                        engine.decode_ez_str_vec(&s).unwrap_err()
+                        engine.decode(&s).unwrap_err()
                     );
                 }
             }
@@ -984,20 +1019,20 @@ fn decode_one_char_in_final_quad_with_padding_error<E: EngineWrapper>(engine_wra
 
             assert_eq!(
                 DecodeError::InvalidByte(num_prefix_quads * 4 + 1, b'='),
-                engine.decode_ez_str_vec(&s).unwrap_err()
+                engine.decode(&s).unwrap_err()
             );
 
             // more padding doesn't change the error
             s.push('=');
             assert_eq!(
                 DecodeError::InvalidByte(num_prefix_quads * 4 + 1, b'='),
-                engine.decode_ez_str_vec(&s).unwrap_err()
+                engine.decode(&s).unwrap_err()
             );
 
             s.push('=');
             assert_eq!(
                 DecodeError::InvalidByte(num_prefix_quads * 4 + 1, b'='),
-                engine.decode_ez_str_vec(&s).unwrap_err()
+                engine.decode(&s).unwrap_err()
             );
         }
     }
@@ -1025,10 +1060,7 @@ fn decode_too_few_symbols_in_final_quad_error<E: EngineWrapper>(engine_wrapper:
                     match final_quad_symbols + padding_symbols {
                         0 => continue,
                         1 => {
-                            assert_eq!(
-                                DecodeError::InvalidLength,
-                                engine.decode_ez_str_vec(&s).unwrap_err()
-                            );
+                            assert_eq!(DecodeError::InvalidLength, engine.decode(&s).unwrap_err());
                         }
                         _ => {
                             // error reported at first padding byte
@@ -1037,7 +1069,7 @@ fn decode_too_few_symbols_in_final_quad_error<E: EngineWrapper>(engine_wrapper:
                                     num_prefix_quads * 4 + final_quad_symbols,
                                     b'=',
                                 ),
-                                engine.decode_ez_str_vec(&s).unwrap_err()
+                                engine.decode(&s).unwrap_err()
                             );
                         }
                     }
@@ -1061,15 +1093,12 @@ fn decode_invalid_trailing_bytes<E: EngineWrapper>(engine_wrapper: E) {
             // message.
             assert_eq!(
                 Err(DecodeError::InvalidByte(num_prefix_quads * 4 + 4, b'\n')),
-                engine.decode_ez_str_vec(&s)
+                engine.decode(&s)
             );
 
             // extra padding, however, is still InvalidLength
             let s = s.replace('\n', "=");
-            assert_eq!(
-                Err(DecodeError::InvalidLength),
-                engine.decode_ez_str_vec(&s)
-            );
+            assert_eq!(Err(DecodeError::InvalidLength), engine.decode(s));
         }
     }
 }
@@ -1090,7 +1119,7 @@ fn decode_wrong_length_error<E: EngineWrapper>(engine_wrapper: E) {
                     s.push('=');
                 }
 
-                let res = engine.decode_ez_str_vec(&s);
+                let res = engine.decode(&s);
                 if num_tokens_final_quad >= 2 {
                     assert!(res.is_ok());
                 } else if num_tokens_final_quad == 1 && num_padding > 0 {
@@ -1112,6 +1141,65 @@ fn decode_wrong_length_error<E: EngineWrapper>(engine_wrapper: E) {
     }
 }
 
+#[apply(all_engines)]
+fn decode_into_slice_fits_in_precisely_sized_slice<E: EngineWrapper>(engine_wrapper: E) {
+    let mut orig_data = Vec::new();
+    let mut encoded_data = String::new();
+    let mut decode_buf = Vec::new();
+
+    let input_len_range = distributions::Uniform::new(0, 1000);
+    let mut rng = rngs::SmallRng::from_entropy();
+
+    for _ in 0..10_000 {
+        orig_data.clear();
+        encoded_data.clear();
+        decode_buf.clear();
+
+        let input_len = input_len_range.sample(&mut rng);
+
+        for _ in 0..input_len {
+            orig_data.push(rng.gen());
+        }
+
+        let engine = E::random(&mut rng);
+        engine.encode_string(&orig_data, &mut encoded_data);
+        assert_encode_sanity(&encoded_data, engine.config().encode_padding(), input_len);
+
+        decode_buf.resize(input_len, 0);
+
+        // decode into the non-empty buf
+        let decode_bytes_written = engine
+            .decode_slice_unchecked(encoded_data.as_bytes(), &mut decode_buf[..])
+            .unwrap();
+
+        assert_eq!(orig_data.len(), decode_bytes_written);
+        assert_eq!(orig_data, decode_buf);
+    }
+}
+
+#[apply(all_engines)]
+fn decode_length_estimate_delta<E: EngineWrapper>(engine_wrapper: E) {
+    for engine in [E::standard(), E::standard_unpadded()] {
+        for &padding in &[true, false] {
+            for orig_len in 0..1000 {
+                let encoded_len = encoded_len(orig_len, padding).unwrap();
+
+                let decoded_estimate = engine
+                    .internal_decoded_len_estimate(encoded_len)
+                    .decoded_len_estimate();
+                assert!(decoded_estimate >= orig_len);
+                assert!(
+                    decoded_estimate - orig_len < 3,
+                    "estimate: {}, encoded: {}, orig: {}",
+                    decoded_estimate,
+                    encoded_len,
+                    orig_len
+                );
+            }
+        }
+    }
+}
+
 /// Returns a tuple of the original data length, the encoded data length (just data), and the length including padding.
 ///
 /// Vecs provided should be empty.
@@ -1125,13 +1213,13 @@ fn generate_random_encoded_data<E: Engine, R: rand::Rng, D: distributions::Distr
     let padding: bool = engine.config().encode_padding();
 
     let orig_len = fill_rand(orig_data, rng, length_distribution);
-    let expected_encoded_len = encode::encoded_len(orig_len, padding).unwrap();
+    let expected_encoded_len = encoded_len(orig_len, padding).unwrap();
     encode_buf.resize(expected_encoded_len, 0);
 
-    let base_encoded_len = engine.encode(&orig_data[..], &mut encode_buf[..]);
+    let base_encoded_len = engine.internal_encode(&orig_data[..], &mut encode_buf[..]);
 
     let enc_len_with_padding = if padding {
-        base_encoded_len + encode::add_padding(orig_len, &mut encode_buf[base_encoded_len..])
+        base_encoded_len + add_padding(orig_len, &mut encode_buf[base_encoded_len..])
     } else {
         base_encoded_len
     };
@@ -1199,38 +1287,35 @@ trait EngineWrapper {
     fn random_alphabet<R: rand::Rng>(rng: &mut R, alphabet: &Alphabet) -> Self::Engine;
 }
 
-struct FastPortableWrapper {}
+struct GeneralPurposeWrapper {}
 
-impl EngineWrapper for FastPortableWrapper {
-    type Engine = fast_portable::FastPortable;
+impl EngineWrapper for GeneralPurposeWrapper {
+    type Engine = general_purpose::GeneralPurpose;
 
     fn standard() -> Self::Engine {
-        fast_portable::FastPortable::from(&STANDARD, fast_portable::PAD)
+        general_purpose::GeneralPurpose::new(&STANDARD, general_purpose::PAD)
     }
 
     fn standard_unpadded() -> Self::Engine {
-        fast_portable::FastPortable::from(
-            &STANDARD,
-            fast_portable::NO_PAD.with_decode_padding_mode(DecodePaddingMode::RequireNone),
-        )
+        general_purpose::GeneralPurpose::new(&STANDARD, general_purpose::NO_PAD)
     }
 
     fn standard_with_pad_mode(
         encode_pad: bool,
         decode_pad_mode: DecodePaddingMode,
     ) -> Self::Engine {
-        fast_portable::FastPortable::from(
+        general_purpose::GeneralPurpose::new(
             &STANDARD,
-            fast_portable::FastPortableConfig::new()
+            general_purpose::GeneralPurposeConfig::new()
                 .with_encode_padding(encode_pad)
                 .with_decode_padding_mode(decode_pad_mode),
         )
     }
 
     fn standard_allow_trailing_bits() -> Self::Engine {
-        fast_portable::FastPortable::from(
+        general_purpose::GeneralPurpose::new(
             &STANDARD,
-            fast_portable::FastPortableConfig::new().with_decode_allow_trailing_bits(true),
+            general_purpose::GeneralPurposeConfig::new().with_decode_allow_trailing_bits(true),
         )
     }
 
@@ -1241,7 +1326,7 @@ impl EngineWrapper for FastPortableWrapper {
     }
 
     fn random_alphabet<R: rand::Rng>(rng: &mut R, alphabet: &Alphabet) -> Self::Engine {
-        fast_portable::FastPortable::from(alphabet, random_config(rng))
+        general_purpose::GeneralPurpose::new(alphabet, random_config(rng))
     }
 }
 
@@ -1251,7 +1336,7 @@ impl EngineWrapper for NaiveWrapper {
     type Engine = naive::Naive;
 
     fn standard() -> Self::Engine {
-        naive::Naive::from(
+        naive::Naive::new(
             &STANDARD,
             naive::NaiveConfig {
                 encode_padding: true,
@@ -1262,7 +1347,7 @@ impl EngineWrapper for NaiveWrapper {
     }
 
     fn standard_unpadded() -> Self::Engine {
-        naive::Naive::from(
+        naive::Naive::new(
             &STANDARD,
             naive::NaiveConfig {
                 encode_padding: false,
@@ -1276,7 +1361,7 @@ impl EngineWrapper for NaiveWrapper {
         encode_pad: bool,
         decode_pad_mode: DecodePaddingMode,
     ) -> Self::Engine {
-        naive::Naive::from(
+        naive::Naive::new(
             &STANDARD,
             naive::NaiveConfig {
                 encode_padding: false,
@@ -1287,7 +1372,7 @@ impl EngineWrapper for NaiveWrapper {
     }
 
     fn standard_allow_trailing_bits() -> Self::Engine {
-        naive::Naive::from(
+        naive::Naive::new(
             &STANDARD,
             naive::NaiveConfig {
                 encode_padding: true,
@@ -1316,43 +1401,10 @@ impl EngineWrapper for NaiveWrapper {
             decode_padding_mode: mode,
         };
 
-        naive::Naive::from(alphabet, config)
+        naive::Naive::new(alphabet, config)
     }
 }
 
-trait EngineExtensions: Engine {
-    // a convenience wrapper to avoid the separate estimate call in tests
-    fn decode_ez(&self, input: &[u8], output: &mut [u8]) -> Result<usize, DecodeError> {
-        let estimate = self.decoded_length_estimate(input.len());
-
-        self.decode(input, output, estimate)
-    }
-
-    fn decode_ez_vec(&self, input: &[u8]) -> Result<Vec<u8>, DecodeError> {
-        let mut output = Vec::new();
-        output.resize((input.len() + 3) / 4 * 3, 0_u8);
-
-        self.decode_ez(input, &mut output[..]).map(|len| {
-            // shrink as needed
-            output.resize(len, 0_u8);
-            output
-        })
-    }
-    fn decode_ez_str_vec(&self, input: &str) -> Result<Vec<u8>, DecodeError> {
-        let mut output = Vec::new();
-        output.resize((input.len() + 3) / 4 * 3, 0_u8);
-
-        self.decode_ez(input.as_bytes(), &mut output[..])
-            .map(|len| {
-                // shrink as needed
-                output.resize(len, 0_u8);
-                output
-            })
-    }
-}
-
-impl<E: Engine> EngineExtensions for E {}
-
 fn seeded_rng() -> impl rand::Rng {
     rngs::SmallRng::from_entropy()
 }
@@ -1371,7 +1423,7 @@ fn assert_all_suffixes_ok<E: Engine>(engine: E, suffixes: Vec<&str>) {
             let mut encoded = "AAAA".repeat(num_prefix_quads);
             encoded.push_str(suffix);
 
-            let res = &engine.decode_ez_str_vec(&encoded);
+            let res = &engine.decode(&encoded);
             assert!(res.is_ok());
         }
     }
diff --git a/src/lib.rs b/src/lib.rs
index 505b972..cc9d628 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,57 +1,66 @@
+//! # Getting started
 //!
-//! # Alphabets
+//! 1. Perhaps one of the preconfigured engines in [engine::general_purpose] will suit, e.g.
+//! [engine::general_purpose::STANDARD_NO_PAD].
+//!     - These are re-exported in [prelude] with a `BASE64_` prefix for those who prefer to
+//!       `use base64::prelude::*` or equivalent, e.g. [prelude::BASE64_STANDARD_NO_PAD]
+//! 1. If not, choose which alphabet you want. Most usage will want [alphabet::STANDARD] or [alphabet::URL_SAFE].
+//! 1. Choose which [Engine] implementation you want. For the moment there is only one: [engine::GeneralPurpose].
+//! 1. Configure the engine appropriately using the engine's `Config` type.
+//!     - This is where you'll select whether to add padding (when encoding) or expect it (when
+//!     decoding). If given the choice, prefer no padding.
+//! 1. Build the engine using the selected alphabet and config.
+//!
+//! For more detail, see below.
+//!
+//! ## Alphabets
 //!
 //! An [alphabet::Alphabet] defines what ASCII symbols are used to encode to or decode from.
 //!
 //! Constants in [alphabet] like [alphabet::STANDARD] or [alphabet::URL_SAFE] provide commonly used
-//! alphabets, but you can also build your own custom `Alphabet` if needed.
+//! alphabets, but you can also build your own custom [alphabet::Alphabet] if needed.
 //!
-//! # Engines
+//! ## Engines
 //!
 //! Once you have an `Alphabet`, you can pick which `Engine` you want. A few parts of the public
 //! API provide a default, but otherwise the user must provide an `Engine` to use.
 //!
-//! See [engine::Engine] for more on what engine to choose, or use [engine::DEFAULT_ENGINE] if you
-//! just want plain old standard base64 and don't have other requirements.
+//! See [Engine] for more.
 //!
 //! ## Config
 //!
 //! In addition to an `Alphabet`, constructing an `Engine` also requires an [engine::Config]. Each
-//! `Engine` has a corresponding `Config` implementation.
-//!
-//! [encode()] and [decode()] use the standard alphabet and default engine in an RFC 4648 standard
-//! setup.
+//! `Engine` has a corresponding `Config` implementation since different `Engine`s may offer different
+//! levels of configurability.
 //!
 //! # Encoding
 //!
-//! Several different encoding functions are available to you depending on your desire for
+//! Several different encoding methods on [Engine] are available to you depending on your desire for
 //! convenience vs performance.
 //!
-//! | Function                | Output                       | Allocates                      |
-//! | ----------------------- | ---------------------------- | ------------------------------ |
-//! | `encode`                | Returns a new `String`       | Always                         |
-//! | `encode_engine`         | Returns a new `String`       | Always                         |
-//! | `encode_engine_string`     | Appends to provided `String` | Only if `String` needs to grow |
-//! | `encode_engine_slice`   | Writes to provided `&[u8]`   | Never - fastest                |
+//! | Method                   | Output                       | Allocates                      |
+//! | ------------------------ | ---------------------------- | ------------------------------ |
+//! | [Engine::encode]         | Returns a new `String`       | Always                         |
+//! | [Engine::encode_string]  | Appends to provided `String` | Only if `String` needs to grow |
+//! | [Engine::encode_slice]   | Writes to provided `&[u8]`   | Never - fastest                |
 //!
-//! All of the encoding functions that take an `Engine` will pad as per the engine's config.
+//! All of the encoding methods will pad as per the engine's config.
 //!
 //! # Decoding
 //!
-//! Just as for encoding, there are different decoding functions available.
+//! Just as for encoding, there are different decoding methods available.
 //!
-//! | Function                | Output                        | Allocates                      |
-//! | ----------------------- | ----------------------------- | ------------------------------ |
-//! | `decode`                | Returns a new `Vec<u8>`       | Always                         |
-//! | `decode_engine`         | Returns a new `Vec<u8>`       | Always                         |
-//! | `decode_engine_vec`     | Appends to provided `Vec<u8>` | Only if `Vec` needs to grow    |
-//! | `decode_engine_slice`   | Writes to provided `&[u8]`    | Never - fastest                |
+//! | Method                   | Output                        | Allocates                      |
+//! | ------------------------ | ----------------------------- | ------------------------------ |
+//! | [Engine::decode]         | Returns a new `Vec<u8>`       | Always                         |
+//! | [Engine::decode_vec]     | Appends to provided `Vec<u8>` | Only if `Vec` needs to grow    |
+//! | [Engine::decode_slice]   | Writes to provided `&[u8]`    | Never - fastest                |
 //!
 //! Unlike encoding, where all possible input is valid, decoding can fail (see [DecodeError]).
 //!
-//! Input can be invalid because it has invalid characters or invalid padding. (No padding at all is
-//! valid, but excess padding is not.) Whitespace in the input is invalid, just like any other
-//! non-base64 byte.
+//! Input can be invalid because it has invalid characters or invalid padding. The nature of how
+//! padding is checked depends on the engine's config.
+//! Whitespace in the input is invalid, just like any other non-base64 byte.
 //!
 //! # `Read` and `Write`
 //!
@@ -68,11 +77,47 @@
 //!
 //! See [display] for how to transparently base64 data via a `Display` implementation.
 //!
+//! # Examples
+//!
+//! ## Using predefined engines
+//!
+//! ```
+//! use base64::{Engine as _, engine::general_purpose};
+//!
+//! let orig = b"data";
+//! let encoded: String = general_purpose::STANDARD_NO_PAD.encode(orig);
+//! assert_eq!("ZGF0YQ", encoded);
+//! assert_eq!(orig.as_slice(), &general_purpose::STANDARD_NO_PAD.decode(encoded).unwrap());
+//!
+//! // or, URL-safe
+//! let encoded_url = general_purpose::URL_SAFE_NO_PAD.encode(orig);
+//! ```
+//!
+//! ## Custom alphabet, config, and engine
+//!
+//! ```
+//! use base64::{engine, alphabet, Engine as _};
+//!
+//! // bizarro-world base64: +/ as the first symbols instead of the last
+//! let alphabet =
+//!     alphabet::Alphabet::new("+/ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789")
+//!     .unwrap();
+//!
+//! // a very weird config that encodes with padding but requires no padding when decoding...?
+//! let crazy_config = engine::GeneralPurposeConfig::new()
+//!     .with_decode_allow_trailing_bits(true)
+//!     .with_encode_padding(true)
+//!     .with_decode_padding_mode(engine::DecodePaddingMode::RequireNone);
+//!
+//! let crazy_engine = engine::GeneralPurpose::new(&alphabet, crazy_config);
+//!
+//! let encoded = crazy_engine.encode(b"abc 123");
+//!
+//! ```
+//!
 //! # Panics
 //!
 //! If length calculations result in overflowing `usize`, a panic will result.
-//!
-//! The `_slice` flavors of encode or decode will panic if the provided output slice is too small.
 
 #![cfg_attr(feature = "cargo-clippy", allow(clippy::cast_lossless))]
 #![deny(
@@ -108,18 +153,25 @@ pub mod read;
 pub mod write;
 
 pub mod engine;
+pub use engine::Engine;
 
 pub mod alphabet;
 
 mod encode;
+#[allow(deprecated)]
 #[cfg(any(feature = "alloc", feature = "std", test))]
 pub use crate::encode::{encode, encode_engine, encode_engine_string};
-pub use crate::encode::{encode_engine_slice, encoded_len};
+#[allow(deprecated)]
+pub use crate::encode::{encode_engine_slice, encoded_len, EncodeSliceError};
 
 mod decode;
+#[allow(deprecated)]
 #[cfg(any(feature = "alloc", feature = "std", test))]
 pub use crate::decode::{decode, decode_engine, decode_engine_vec};
-pub use crate::decode::{decode_engine_slice, DecodeError};
+#[allow(deprecated)]
+pub use crate::decode::{decode_engine_slice, decoded_len_estimate, DecodeError, DecodeSliceError};
+
+pub mod prelude;
 
 #[cfg(test)]
 mod tests;
diff --git a/src/prelude.rs b/src/prelude.rs
new file mode 100644
index 0000000..fbeb5ba
--- /dev/null
+++ b/src/prelude.rs
@@ -0,0 +1,19 @@
+//! Preconfigured engines for common use cases.
+//!
+//! These are re-exports of `const` engines in [crate::engine::general_purpose], renamed with a `BASE64_`
+//! prefix for those who prefer to `use` the entire path to a name.
+//!
+//! # Examples
+//!
+//! ```
+//! use base64::prelude::{Engine as _, BASE64_STANDARD_NO_PAD};
+//!
+//! assert_eq!("c29tZSBieXRlcw", &BASE64_STANDARD_NO_PAD.encode(b"some bytes"));
+//! ```
+
+pub use crate::engine::Engine;
+
+pub use crate::engine::general_purpose::STANDARD as BASE64_STANDARD;
+pub use crate::engine::general_purpose::STANDARD_NO_PAD as BASE64_STANDARD_NO_PAD;
+pub use crate::engine::general_purpose::URL_SAFE as BASE64_URL_SAFE;
+pub use crate::engine::general_purpose::URL_SAFE_NO_PAD as BASE64_URL_SAFE_NO_PAD;
diff --git a/src/read/decoder.rs b/src/read/decoder.rs
index 13370bb..4888c9c 100644
--- a/src/read/decoder.rs
+++ b/src/read/decoder.rs
@@ -1,4 +1,4 @@
-use crate::{decode_engine_slice, engine::Engine, DecodeError};
+use crate::{engine::Engine, DecodeError};
 use std::{cmp, fmt, io};
 
 // This should be large, but it has to fit on the stack.
@@ -15,12 +15,13 @@ const DECODED_CHUNK_SIZE: usize = 3;
 /// ```
 /// use std::io::Read;
 /// use std::io::Cursor;
+/// use base64::engine::general_purpose;
 ///
 /// // use a cursor as the simplest possible `Read` -- in real code this is probably a file, etc.
 /// let mut wrapped_reader = Cursor::new(b"YXNkZg==");
-/// let mut decoder = base64::read::DecoderReader::from(
+/// let mut decoder = base64::read::DecoderReader::new(
 ///     &mut wrapped_reader,
-///     &base64::engine::DEFAULT_ENGINE);
+///     &general_purpose::STANDARD);
 ///
 /// // handle errors as you normally would
 /// let mut result = Vec::new();
@@ -69,7 +70,7 @@ impl<'e, E: Engine, R: io::Read> fmt::Debug for DecoderReader<'e, E, R> {
 
 impl<'e, E: Engine, R: io::Read> DecoderReader<'e, E, R> {
     /// Create a new decoder that will read from the provided reader `r`.
-    pub fn from(reader: R, engine: &'e E) -> Self {
+    pub fn new(reader: R, engine: &'e E) -> Self {
         DecoderReader {
             engine,
             inner: reader,
@@ -131,22 +132,24 @@ impl<'e, E: Engine, R: io::Read> DecoderReader<'e, E, R> {
         debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
         debug_assert!(!buf.is_empty());
 
-        let decoded = decode_engine_slice(
-            &self.b64_buffer[self.b64_offset..self.b64_offset + num_bytes],
-            buf,
-            self.engine,
-        )
-        .map_err(|e| match e {
-            DecodeError::InvalidByte(offset, byte) => {
-                DecodeError::InvalidByte(self.total_b64_decoded + offset, byte)
-            }
-            DecodeError::InvalidLength => DecodeError::InvalidLength,
-            DecodeError::InvalidLastSymbol(offset, byte) => {
-                DecodeError::InvalidLastSymbol(self.total_b64_decoded + offset, byte)
-            }
-            DecodeError::InvalidPadding => DecodeError::InvalidPadding,
-        })
-        .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
+        let decoded = self
+            .engine
+            .internal_decode(
+                &self.b64_buffer[self.b64_offset..self.b64_offset + num_bytes],
+                buf,
+                self.engine.internal_decoded_len_estimate(num_bytes),
+            )
+            .map_err(|e| match e {
+                DecodeError::InvalidByte(offset, byte) => {
+                    DecodeError::InvalidByte(self.total_b64_decoded + offset, byte)
+                }
+                DecodeError::InvalidLength => DecodeError::InvalidLength,
+                DecodeError::InvalidLastSymbol(offset, byte) => {
+                    DecodeError::InvalidLastSymbol(self.total_b64_decoded + offset, byte)
+                }
+                DecodeError::InvalidPadding => DecodeError::InvalidPadding,
+            })
+            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
 
         self.total_b64_decoded += num_bytes;
         self.b64_offset += num_bytes;
diff --git a/src/read/decoder_tests.rs b/src/read/decoder_tests.rs
index 3c882b7..65d58d8 100644
--- a/src/read/decoder_tests.rs
+++ b/src/read/decoder_tests.rs
@@ -1,14 +1,17 @@
-use std::io::{self, Read};
+use std::{
+    cmp,
+    io::{self, Read as _},
+    iter,
+};
 
-use rand::{Rng, RngCore};
-use std::{cmp, iter};
+use rand::{Rng as _, RngCore as _};
 
 use super::decoder::{DecoderReader, BUF_SIZE};
-use crate::encode::encode_engine_string;
-use crate::engine::fast_portable::FastPortable;
-use crate::engine::DEFAULT_ENGINE;
-use crate::tests::{random_alphabet, random_config, random_engine};
-use crate::{decode_engine_vec, DecodeError};
+use crate::{
+    engine::{general_purpose::STANDARD, Engine, GeneralPurpose},
+    tests::{random_alphabet, random_config, random_engine},
+    DecodeError,
+};
 
 #[test]
 fn simple() {
@@ -29,7 +32,7 @@ fn simple() {
         // Read n bytes at a time.
         for n in 1..base64data.len() + 1 {
             let mut wrapped_reader = io::Cursor::new(base64data);
-            let mut decoder = DecoderReader::from(&mut wrapped_reader, &DEFAULT_ENGINE);
+            let mut decoder = DecoderReader::new(&mut wrapped_reader, &STANDARD);
 
             // handle errors as you normally would
             let mut text_got = Vec::new();
@@ -61,7 +64,7 @@ fn trailing_junk() {
         // Read n bytes at a time.
         for n in 1..base64data.len() + 1 {
             let mut wrapped_reader = io::Cursor::new(base64data);
-            let mut decoder = DecoderReader::from(&mut wrapped_reader, &DEFAULT_ENGINE);
+            let mut decoder = DecoderReader::new(&mut wrapped_reader, &STANDARD);
 
             // handle errors as you normally would
             let mut buffer = vec![0u8; n];
@@ -101,7 +104,7 @@ fn handles_short_read_from_delegate() {
         assert_eq!(size, bytes.len());
 
         let engine = random_engine(&mut rng);
-        encode_engine_string(&bytes[..], &mut b64, &engine);
+        engine.encode_string(&bytes[..], &mut b64);
 
         let mut wrapped_reader = io::Cursor::new(b64.as_bytes());
         let mut short_reader = RandomShortRead {
@@ -109,7 +112,7 @@ fn handles_short_read_from_delegate() {
             rng: &mut rng,
         };
 
-        let mut decoder = DecoderReader::from(&mut short_reader, &engine);
+        let mut decoder = DecoderReader::new(&mut short_reader, &engine);
 
         let decoded_len = decoder.read_to_end(&mut decoded).unwrap();
         assert_eq!(size, decoded_len);
@@ -139,10 +142,10 @@ fn read_in_short_increments() {
 
         let engine = random_engine(&mut rng);
 
-        encode_engine_string(&bytes[..], &mut b64, &engine);
+        engine.encode_string(&bytes[..], &mut b64);
 
         let mut wrapped_reader = io::Cursor::new(&b64[..]);
-        let mut decoder = DecoderReader::from(&mut wrapped_reader, &engine);
+        let mut decoder = DecoderReader::new(&mut wrapped_reader, &engine);
 
         consume_with_short_reads_and_validate(&mut rng, &bytes[..], &mut decoded, &mut decoder);
     }
@@ -170,10 +173,10 @@ fn read_in_short_increments_with_short_delegate_reads() {
 
         let engine = random_engine(&mut rng);
 
-        encode_engine_string(&bytes[..], &mut b64, &engine);
+        engine.encode_string(&bytes[..], &mut b64);
 
         let mut base_reader = io::Cursor::new(&b64[..]);
-        let mut decoder = DecoderReader::from(&mut base_reader, &engine);
+        let mut decoder = DecoderReader::new(&mut base_reader, &engine);
         let mut short_reader = RandomShortRead {
             delegate: &mut decoder,
             rng: &mut rand::thread_rng(),
@@ -211,8 +214,8 @@ fn reports_invalid_last_symbol_correctly() {
         let config = random_config(&mut rng);
         let alphabet = random_alphabet(&mut rng);
         // changing padding will cause invalid padding errors when we twiddle the last byte
-        let engine = FastPortable::from(alphabet, config.with_encode_padding(false));
-        encode_engine_string(&bytes[..], &mut b64, &engine);
+        let engine = GeneralPurpose::new(alphabet, config.with_encode_padding(false));
+        engine.encode_string(&bytes[..], &mut b64);
         b64_bytes.extend(b64.bytes());
         assert_eq!(b64_bytes.len(), b64.len());
 
@@ -224,10 +227,10 @@ fn reports_invalid_last_symbol_correctly() {
 
             // replace the last
             *b64_bytes.last_mut().unwrap() = s1;
-            let bulk_res = decode_engine_vec(&b64_bytes[..], &mut bulk_decoded, &engine);
+            let bulk_res = engine.decode_vec(&b64_bytes[..], &mut bulk_decoded);
 
             let mut wrapped_reader = io::Cursor::new(&b64_bytes[..]);
-            let mut decoder = DecoderReader::from(&mut wrapped_reader, &engine);
+            let mut decoder = DecoderReader::new(&mut wrapped_reader, &engine);
 
             let stream_res = decoder.read_to_end(&mut decoded).map(|_| ()).map_err(|e| {
                 e.into_inner()
@@ -258,14 +261,14 @@ fn reports_invalid_byte_correctly() {
 
         let engine = random_engine(&mut rng);
 
-        encode_engine_string(&bytes[..], &mut b64, &engine);
+        engine.encode_string(&bytes[..], &mut b64);
         // replace one byte, somewhere, with '*', which is invalid
         let bad_byte_pos = rng.gen_range(0..b64.len());
         let mut b64_bytes = b64.bytes().collect::<Vec<u8>>();
         b64_bytes[bad_byte_pos] = b'*';
 
         let mut wrapped_reader = io::Cursor::new(b64_bytes.clone());
-        let mut decoder = DecoderReader::from(&mut wrapped_reader, &engine);
+        let mut decoder = DecoderReader::new(&mut wrapped_reader, &engine);
 
         // some gymnastics to avoid double-moving the io::Error, which is not Copy
         let read_decode_err = decoder
@@ -281,7 +284,7 @@ fn reports_invalid_byte_correctly() {
             .and_then(|o| o);
 
         let mut bulk_buf = Vec::new();
-        let bulk_decode_err = decode_engine_vec(&b64_bytes[..], &mut bulk_buf, &engine).err();
+        let bulk_decode_err = engine.decode_vec(&b64_bytes[..], &mut bulk_buf).err();
 
         // it's tricky to predict where the invalid data's offset will be since if it's in the last
         // chunk it will be reported at the first padding location because it's treated as invalid
@@ -293,7 +296,7 @@ fn reports_invalid_byte_correctly() {
     }
 }
 
-fn consume_with_short_reads_and_validate<R: Read>(
+fn consume_with_short_reads_and_validate<R: io::Read>(
     rng: &mut rand::rngs::ThreadRng,
     expected_bytes: &[u8],
     decoded: &mut [u8],
diff --git a/src/tests.rs b/src/tests.rs
index bb5458f..7083b54 100644
--- a/src/tests.rs
+++ b/src/tests.rs
@@ -8,11 +8,10 @@ use rand::{
 };
 
 use crate::{
-    alphabet, decode_engine,
+    alphabet,
     encode::encoded_len,
-    encode_engine_string,
     engine::{
-        fast_portable::{FastPortable, FastPortableConfig},
+        general_purpose::{GeneralPurpose, GeneralPurposeConfig},
         Config, DecodePaddingMode, Engine,
     },
 };
@@ -68,17 +67,17 @@ fn roundtrip_random_config(input_len_range: Uniform<usize>, iterations: u32) {
             input_buf.push(rng.gen());
         }
 
-        encode_engine_string(&input_buf, &mut encoded_buf, &engine);
+        engine.encode_string(&input_buf, &mut encoded_buf);
 
         assert_encode_sanity(&encoded_buf, engine.config().encode_padding(), input_len);
 
-        assert_eq!(input_buf, decode_engine(&encoded_buf, &engine).unwrap());
+        assert_eq!(input_buf, engine.decode(&encoded_buf).unwrap());
     }
 }
 
-pub fn random_config<R: Rng>(rng: &mut R) -> FastPortableConfig {
+pub fn random_config<R: Rng>(rng: &mut R) -> GeneralPurposeConfig {
     let mode = rng.gen();
-    FastPortableConfig::new()
+    GeneralPurposeConfig::new()
         .with_encode_padding(match mode {
             DecodePaddingMode::Indifferent => rng.gen(),
             DecodePaddingMode::RequireCanonical => true,
@@ -102,10 +101,10 @@ pub fn random_alphabet<R: Rng>(rng: &mut R) -> &'static alphabet::Alphabet {
     ALPHABETS.choose(rng).unwrap()
 }
 
-pub fn random_engine<R: Rng>(rng: &mut R) -> FastPortable {
+pub fn random_engine<R: Rng>(rng: &mut R) -> GeneralPurpose {
     let alphabet = random_alphabet(rng);
     let config = random_config(rng);
-    FastPortable::from(alphabet, config)
+    GeneralPurpose::new(alphabet, config)
 }
 
 const ALPHABETS: &[alphabet::Alphabet] = &[
diff --git a/src/write/encoder.rs b/src/write/encoder.rs
index a2705e5..1c19bb4 100644
--- a/src/write/encoder.rs
+++ b/src/write/encoder.rs
@@ -1,4 +1,3 @@
-use crate::encode_engine_slice;
 use crate::engine::Engine;
 use std::{
     cmp, fmt, io,
@@ -23,11 +22,10 @@ const MIN_ENCODE_CHUNK_SIZE: usize = 3;
 ///
 /// ```
 /// use std::io::Write;
+/// use base64::engine::general_purpose;
 ///
 /// // use a vec as the simplest possible `Write` -- in real code this is probably a file, etc.
-/// let mut enc = base64::write::EncoderWriter::from(
-///     Vec::new(),
-///     &base64::engine::DEFAULT_ENGINE);
+/// let mut enc = base64::write::EncoderWriter::new(Vec::new(), &general_purpose::STANDARD);
 ///
 /// // handle errors as you normally would
 /// enc.write_all(b"asdf").unwrap();
@@ -97,7 +95,7 @@ impl<'e, E: Engine, W: io::Write> fmt::Debug for EncoderWriter<'e, E, W> {
 
 impl<'e, E: Engine, W: io::Write> EncoderWriter<'e, E, W> {
     /// Create a new encoder that will write to the provided delegate writer.
-    pub fn from(delegate: W, engine: &'e E) -> EncoderWriter<'e, E, W> {
+    pub fn new(delegate: W, engine: &'e E) -> EncoderWriter<'e, E, W> {
         EncoderWriter {
             engine,
             delegate: Some(delegate),
@@ -150,11 +148,13 @@ impl<'e, E: Engine, W: io::Write> EncoderWriter<'e, E, W> {
         self.write_all_encoded_output()?;
 
         if self.extra_input_occupied_len > 0 {
-            let encoded_len = encode_engine_slice(
-                &self.extra_input[..self.extra_input_occupied_len],
-                &mut self.output[..],
-                self.engine,
-            );
+            let encoded_len = self
+                .engine
+                .encode_slice(
+                    &self.extra_input[..self.extra_input_occupied_len],
+                    &mut self.output[..],
+                )
+                .expect("buffer is large enough");
 
             self.output_occupied_len = encoded_len;
 
@@ -314,7 +314,7 @@ impl<'e, E: Engine, W: io::Write> io::Write for EncoderWriter<'e, E, W> {
                 self.extra_input[self.extra_input_occupied_len..MIN_ENCODE_CHUNK_SIZE]
                     .copy_from_slice(&input[0..extra_input_read_len]);
 
-                let len = self.engine.encode(
+                let len = self.engine.internal_encode(
                     &self.extra_input[0..MIN_ENCODE_CHUNK_SIZE],
                     &mut self.output[..],
                 );
@@ -362,7 +362,7 @@ impl<'e, E: Engine, W: io::Write> io::Write for EncoderWriter<'e, E, W> {
         debug_assert_eq!(0, max_input_len % MIN_ENCODE_CHUNK_SIZE);
         debug_assert_eq!(0, input_chunks_to_encode_len % MIN_ENCODE_CHUNK_SIZE);
 
-        encoded_size += self.engine.encode(
+        encoded_size += self.engine.internal_encode(
             &input[..(input_chunks_to_encode_len)],
             &mut self.output[encoded_size..],
         );
diff --git a/src/write/encoder_string_writer.rs b/src/write/encoder_string_writer.rs
index 2a531f0..9394dc9 100644
--- a/src/write/encoder_string_writer.rs
+++ b/src/write/encoder_string_writer.rs
@@ -1,7 +1,6 @@
 use super::encoder::EncoderWriter;
 use crate::engine::Engine;
 use std::io;
-use std::io::Write;
 
 /// A `Write` implementation that base64-encodes data using the provided config and accumulates the
 /// resulting base64 utf8 `&str` in a [StrConsumer] implementation (typically `String`), which is
@@ -13,9 +12,9 @@ use std::io::Write;
 ///
 /// ```
 /// use std::io::Write;
+/// use base64::engine::general_purpose;
 ///
-/// let mut enc = base64::write::EncoderStringWriter::from(
-///     &base64::engine::DEFAULT_ENGINE);
+/// let mut enc = base64::write::EncoderStringWriter::new(&general_purpose::STANDARD);
 ///
 /// enc.write_all(b"asdf").unwrap();
 ///
@@ -29,12 +28,13 @@ use std::io::Write;
 ///
 /// ```
 /// use std::io::Write;
+/// use base64::engine::general_purpose;
 ///
 /// let mut buf = String::from("base64: ");
 ///
 /// let mut enc = base64::write::EncoderStringWriter::from_consumer(
 ///     &mut buf,
-///     &base64::engine::DEFAULT_ENGINE);
+///     &general_purpose::STANDARD);
 ///
 /// enc.write_all(b"asdf").unwrap();
 ///
@@ -61,7 +61,7 @@ impl<'e, E: Engine, S: StrConsumer> EncoderStringWriter<'e, E, S> {
     /// Create a EncoderStringWriter that will append to the provided `StrConsumer`.
     pub fn from_consumer(str_consumer: S, engine: &'e E) -> Self {
         EncoderStringWriter {
-            encoder: EncoderWriter::from(Utf8SingleCodeUnitWriter { str_consumer }, engine),
+            encoder: EncoderWriter::new(Utf8SingleCodeUnitWriter { str_consumer }, engine),
         }
     }
 
@@ -79,12 +79,12 @@ impl<'e, E: Engine, S: StrConsumer> EncoderStringWriter<'e, E, S> {
 
 impl<'e, E: Engine> EncoderStringWriter<'e, E, String> {
     /// Create a EncoderStringWriter that will encode into a new `String` with the provided config.
-    pub fn from(engine: &'e E) -> Self {
+    pub fn new(engine: &'e E) -> Self {
         EncoderStringWriter::from_consumer(String::new(), engine)
     }
 }
 
-impl<'e, E: Engine, S: StrConsumer> Write for EncoderStringWriter<'e, E, S> {
+impl<'e, E: Engine, S: StrConsumer> io::Write for EncoderStringWriter<'e, E, S> {
     fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
         self.encoder.write(buf)
     }
@@ -140,9 +140,9 @@ impl<S: StrConsumer> io::Write for Utf8SingleCodeUnitWriter<S> {
 
 #[cfg(test)]
 mod tests {
-    use crate::encode_engine_string;
-    use crate::tests::random_engine;
-    use crate::write::encoder_string_writer::EncoderStringWriter;
+    use crate::{
+        engine::Engine, tests::random_engine, write::encoder_string_writer::EncoderStringWriter,
+    };
     use rand::Rng;
     use std::io::Write;
 
@@ -163,9 +163,9 @@ mod tests {
             }
 
             let engine = random_engine(&mut rng);
-            encode_engine_string(&orig_data, &mut normal_encoded, &engine);
+            engine.encode_string(&orig_data, &mut normal_encoded);
 
-            let mut stream_encoder = EncoderStringWriter::from(&engine);
+            let mut stream_encoder = EncoderStringWriter::new(&engine);
             // Write the first i bytes, then the rest
             stream_encoder.write_all(&orig_data[0..i]).unwrap();
             stream_encoder.write_all(&orig_data[i..]).unwrap();
diff --git a/src/write/encoder_tests.rs b/src/write/encoder_tests.rs
index 9f875d8..ce76d63 100644
--- a/src/write/encoder_tests.rs
+++ b/src/write/encoder_tests.rs
@@ -3,36 +3,37 @@ use std::{cmp, io, str};
 
 use rand::Rng;
 
-use crate::alphabet::{STANDARD, URL_SAFE};
-use crate::engine::fast_portable::{FastPortable, NO_PAD, PAD};
-use crate::tests::random_engine;
-use crate::{encode_engine, encode_engine_string};
+use crate::{
+    alphabet::{STANDARD, URL_SAFE},
+    engine::{
+        general_purpose::{GeneralPurpose, NO_PAD, PAD},
+        Engine,
+    },
+    tests::random_engine,
+};
 
 use super::EncoderWriter;
 
-const URL_SAFE_ENGINE: FastPortable = FastPortable::from(&URL_SAFE, PAD);
-const NO_PAD_ENGINE: FastPortable = FastPortable::from(&STANDARD, NO_PAD);
+const URL_SAFE_ENGINE: GeneralPurpose = GeneralPurpose::new(&URL_SAFE, PAD);
+const NO_PAD_ENGINE: GeneralPurpose = GeneralPurpose::new(&STANDARD, NO_PAD);
 
 #[test]
 fn encode_three_bytes() {
     let mut c = Cursor::new(Vec::new());
     {
-        let mut enc = EncoderWriter::from(&mut c, &URL_SAFE_ENGINE);
+        let mut enc = EncoderWriter::new(&mut c, &URL_SAFE_ENGINE);
 
         let sz = enc.write(b"abc").unwrap();
         assert_eq!(sz, 3);
     }
-    assert_eq!(
-        &c.get_ref()[..],
-        encode_engine("abc", &URL_SAFE_ENGINE).as_bytes()
-    );
+    assert_eq!(&c.get_ref()[..], URL_SAFE_ENGINE.encode("abc").as_bytes());
 }
 
 #[test]
 fn encode_nine_bytes_two_writes() {
     let mut c = Cursor::new(Vec::new());
     {
-        let mut enc = EncoderWriter::from(&mut c, &URL_SAFE_ENGINE);
+        let mut enc = EncoderWriter::new(&mut c, &URL_SAFE_ENGINE);
 
         let sz = enc.write(b"abcdef").unwrap();
         assert_eq!(sz, 6);
@@ -41,7 +42,7 @@ fn encode_nine_bytes_two_writes() {
     }
     assert_eq!(
         &c.get_ref()[..],
-        encode_engine("abcdefghi", &URL_SAFE_ENGINE).as_bytes()
+        URL_SAFE_ENGINE.encode("abcdefghi").as_bytes()
     );
 }
 
@@ -49,24 +50,21 @@ fn encode_nine_bytes_two_writes() {
 fn encode_one_then_two_bytes() {
     let mut c = Cursor::new(Vec::new());
     {
-        let mut enc = EncoderWriter::from(&mut c, &URL_SAFE_ENGINE);
+        let mut enc = EncoderWriter::new(&mut c, &URL_SAFE_ENGINE);
 
         let sz = enc.write(b"a").unwrap();
         assert_eq!(sz, 1);
         let sz = enc.write(b"bc").unwrap();
         assert_eq!(sz, 2);
     }
-    assert_eq!(
-        &c.get_ref()[..],
-        encode_engine("abc", &URL_SAFE_ENGINE).as_bytes()
-    );
+    assert_eq!(&c.get_ref()[..], URL_SAFE_ENGINE.encode("abc").as_bytes());
 }
 
 #[test]
 fn encode_one_then_five_bytes() {
     let mut c = Cursor::new(Vec::new());
     {
-        let mut enc = EncoderWriter::from(&mut c, &URL_SAFE_ENGINE);
+        let mut enc = EncoderWriter::new(&mut c, &URL_SAFE_ENGINE);
 
         let sz = enc.write(b"a").unwrap();
         assert_eq!(sz, 1);
@@ -75,7 +73,7 @@ fn encode_one_then_five_bytes() {
     }
     assert_eq!(
         &c.get_ref()[..],
-        encode_engine("abcdef", &URL_SAFE_ENGINE).as_bytes()
+        URL_SAFE_ENGINE.encode("abcdef").as_bytes()
     );
 }
 
@@ -83,7 +81,7 @@ fn encode_one_then_five_bytes() {
 fn encode_1_2_3_bytes() {
     let mut c = Cursor::new(Vec::new());
     {
-        let mut enc = EncoderWriter::from(&mut c, &URL_SAFE_ENGINE);
+        let mut enc = EncoderWriter::new(&mut c, &URL_SAFE_ENGINE);
 
         let sz = enc.write(b"a").unwrap();
         assert_eq!(sz, 1);
@@ -94,7 +92,7 @@ fn encode_1_2_3_bytes() {
     }
     assert_eq!(
         &c.get_ref()[..],
-        encode_engine("abcdef", &URL_SAFE_ENGINE).as_bytes()
+        URL_SAFE_ENGINE.encode("abcdef").as_bytes()
     );
 }
 
@@ -102,23 +100,20 @@ fn encode_1_2_3_bytes() {
 fn encode_with_padding() {
     let mut c = Cursor::new(Vec::new());
     {
-        let mut enc = EncoderWriter::from(&mut c, &URL_SAFE_ENGINE);
+        let mut enc = EncoderWriter::new(&mut c, &URL_SAFE_ENGINE);
 
         enc.write_all(b"abcd").unwrap();
 
         enc.flush().unwrap();
     }
-    assert_eq!(
-        &c.get_ref()[..],
-        encode_engine("abcd", &URL_SAFE_ENGINE).as_bytes()
-    );
+    assert_eq!(&c.get_ref()[..], URL_SAFE_ENGINE.encode("abcd").as_bytes());
 }
 
 #[test]
 fn encode_with_padding_multiple_writes() {
     let mut c = Cursor::new(Vec::new());
     {
-        let mut enc = EncoderWriter::from(&mut c, &URL_SAFE_ENGINE);
+        let mut enc = EncoderWriter::new(&mut c, &URL_SAFE_ENGINE);
 
         assert_eq!(1, enc.write(b"a").unwrap());
         assert_eq!(2, enc.write(b"bc").unwrap());
@@ -129,7 +124,7 @@ fn encode_with_padding_multiple_writes() {
     }
     assert_eq!(
         &c.get_ref()[..],
-        encode_engine("abcdefg", &URL_SAFE_ENGINE).as_bytes()
+        URL_SAFE_ENGINE.encode("abcdefg").as_bytes()
     );
 }
 
@@ -137,7 +132,7 @@ fn encode_with_padding_multiple_writes() {
 fn finish_writes_extra_byte() {
     let mut c = Cursor::new(Vec::new());
     {
-        let mut enc = EncoderWriter::from(&mut c, &URL_SAFE_ENGINE);
+        let mut enc = EncoderWriter::new(&mut c, &URL_SAFE_ENGINE);
 
         assert_eq!(6, enc.write(b"abcdef").unwrap());
 
@@ -149,7 +144,7 @@ fn finish_writes_extra_byte() {
     }
     assert_eq!(
         &c.get_ref()[..],
-        encode_engine("abcdefg", &URL_SAFE_ENGINE).as_bytes()
+        URL_SAFE_ENGINE.encode("abcdefg").as_bytes()
     );
 }
 
@@ -157,17 +152,14 @@ fn finish_writes_extra_byte() {
 fn write_partial_chunk_encodes_partial_chunk() {
     let mut c = Cursor::new(Vec::new());
     {
-        let mut enc = EncoderWriter::from(&mut c, &NO_PAD_ENGINE);
+        let mut enc = EncoderWriter::new(&mut c, &NO_PAD_ENGINE);
 
         // nothing encoded yet
         assert_eq!(2, enc.write(b"ab").unwrap());
         // encoded here
         let _ = enc.finish().unwrap();
     }
-    assert_eq!(
-        &c.get_ref()[..],
-        encode_engine("ab", &NO_PAD_ENGINE).as_bytes()
-    );
+    assert_eq!(&c.get_ref()[..], NO_PAD_ENGINE.encode("ab").as_bytes());
     assert_eq!(3, c.get_ref().len());
 }
 
@@ -175,15 +167,12 @@ fn write_partial_chunk_encodes_partial_chunk() {
 fn write_1_chunk_encodes_complete_chunk() {
     let mut c = Cursor::new(Vec::new());
     {
-        let mut enc = EncoderWriter::from(&mut c, &NO_PAD_ENGINE);
+        let mut enc = EncoderWriter::new(&mut c, &NO_PAD_ENGINE);
 
         assert_eq!(3, enc.write(b"abc").unwrap());
         let _ = enc.finish().unwrap();
     }
-    assert_eq!(
-        &c.get_ref()[..],
-        encode_engine("abc", &NO_PAD_ENGINE).as_bytes()
-    );
+    assert_eq!(&c.get_ref()[..], NO_PAD_ENGINE.encode("abc").as_bytes());
     assert_eq!(4, c.get_ref().len());
 }
 
@@ -191,16 +180,13 @@ fn write_1_chunk_encodes_complete_chunk() {
 fn write_1_chunk_and_partial_encodes_only_complete_chunk() {
     let mut c = Cursor::new(Vec::new());
     {
-        let mut enc = EncoderWriter::from(&mut c, &NO_PAD_ENGINE);
+        let mut enc = EncoderWriter::new(&mut c, &NO_PAD_ENGINE);
 
         // "d" not consumed since it's not a full chunk
         assert_eq!(3, enc.write(b"abcd").unwrap());
         let _ = enc.finish().unwrap();
     }
-    assert_eq!(
-        &c.get_ref()[..],
-        encode_engine("abc", &NO_PAD_ENGINE).as_bytes()
-    );
+    assert_eq!(&c.get_ref()[..], NO_PAD_ENGINE.encode("abc").as_bytes());
     assert_eq!(4, c.get_ref().len());
 }
 
@@ -208,16 +194,13 @@ fn write_1_chunk_and_partial_encodes_only_complete_chunk() {
 fn write_2_partials_to_exactly_complete_chunk_encodes_complete_chunk() {
     let mut c = Cursor::new(Vec::new());
     {
-        let mut enc = EncoderWriter::from(&mut c, &NO_PAD_ENGINE);
+        let mut enc = EncoderWriter::new(&mut c, &NO_PAD_ENGINE);
 
         assert_eq!(1, enc.write(b"a").unwrap());
         assert_eq!(2, enc.write(b"bc").unwrap());
         let _ = enc.finish().unwrap();
     }
-    assert_eq!(
-        &c.get_ref()[..],
-        encode_engine("abc", &NO_PAD_ENGINE).as_bytes()
-    );
+    assert_eq!(&c.get_ref()[..], NO_PAD_ENGINE.encode("abc").as_bytes());
     assert_eq!(4, c.get_ref().len());
 }
 
@@ -226,17 +209,14 @@ fn write_partial_then_enough_to_complete_chunk_but_not_complete_another_chunk_en
 ) {
     let mut c = Cursor::new(Vec::new());
     {
-        let mut enc = EncoderWriter::from(&mut c, &NO_PAD_ENGINE);
+        let mut enc = EncoderWriter::new(&mut c, &NO_PAD_ENGINE);
 
         assert_eq!(1, enc.write(b"a").unwrap());
         // doesn't consume "d"
         assert_eq!(2, enc.write(b"bcd").unwrap());
         let _ = enc.finish().unwrap();
     }
-    assert_eq!(
-        &c.get_ref()[..],
-        encode_engine("abc", &NO_PAD_ENGINE).as_bytes()
-    );
+    assert_eq!(&c.get_ref()[..], NO_PAD_ENGINE.encode("abc").as_bytes());
     assert_eq!(4, c.get_ref().len());
 }
 
@@ -244,17 +224,14 @@ fn write_partial_then_enough_to_complete_chunk_but_not_complete_another_chunk_en
 fn write_partial_then_enough_to_complete_chunk_and_another_chunk_encodes_complete_chunks() {
     let mut c = Cursor::new(Vec::new());
     {
-        let mut enc = EncoderWriter::from(&mut c, &NO_PAD_ENGINE);
+        let mut enc = EncoderWriter::new(&mut c, &NO_PAD_ENGINE);
 
         assert_eq!(1, enc.write(b"a").unwrap());
         // completes partial chunk, and another chunk
         assert_eq!(5, enc.write(b"bcdef").unwrap());
         let _ = enc.finish().unwrap();
     }
-    assert_eq!(
-        &c.get_ref()[..],
-        encode_engine("abcdef", &NO_PAD_ENGINE).as_bytes()
-    );
+    assert_eq!(&c.get_ref()[..], NO_PAD_ENGINE.encode("abcdef").as_bytes());
     assert_eq!(8, c.get_ref().len());
 }
 
@@ -263,7 +240,7 @@ fn write_partial_then_enough_to_complete_chunk_and_another_chunk_and_another_par
 ) {
     let mut c = Cursor::new(Vec::new());
     {
-        let mut enc = EncoderWriter::from(&mut c, &NO_PAD_ENGINE);
+        let mut enc = EncoderWriter::new(&mut c, &NO_PAD_ENGINE);
 
         assert_eq!(1, enc.write(b"a").unwrap());
         // completes partial chunk, and another chunk, with one more partial chunk that's not
@@ -271,10 +248,7 @@ fn write_partial_then_enough_to_complete_chunk_and_another_chunk_and_another_par
         assert_eq!(5, enc.write(b"bcdefe").unwrap());
         let _ = enc.finish().unwrap();
     }
-    assert_eq!(
-        &c.get_ref()[..],
-        encode_engine("abcdef", &NO_PAD_ENGINE).as_bytes()
-    );
+    assert_eq!(&c.get_ref()[..], NO_PAD_ENGINE.encode("abcdef").as_bytes());
     assert_eq!(8, c.get_ref().len());
 }
 
@@ -282,13 +256,10 @@ fn write_partial_then_enough_to_complete_chunk_and_another_chunk_and_another_par
 fn drop_calls_finish_for_you() {
     let mut c = Cursor::new(Vec::new());
     {
-        let mut enc = EncoderWriter::from(&mut c, &NO_PAD_ENGINE);
+        let mut enc = EncoderWriter::new(&mut c, &NO_PAD_ENGINE);
         assert_eq!(1, enc.write(b"a").unwrap());
     }
-    assert_eq!(
-        &c.get_ref()[..],
-        encode_engine("a", &NO_PAD_ENGINE).as_bytes()
-    );
+    assert_eq!(&c.get_ref()[..], NO_PAD_ENGINE.encode("a").as_bytes());
     assert_eq!(2, c.get_ref().len());
 }
 
@@ -311,10 +282,10 @@ fn every_possible_split_of_input() {
         }
 
         let engine = random_engine(&mut rng);
-        encode_engine_string(&orig_data, &mut normal_encoded, &engine);
+        engine.encode_string(&orig_data, &mut normal_encoded);
 
         {
-            let mut stream_encoder = EncoderWriter::from(&mut stream_encoded, &engine);
+            let mut stream_encoder = EncoderWriter::new(&mut stream_encoded, &engine);
             // Write the first i bytes, then the rest
             stream_encoder.write_all(&orig_data[0..i]).unwrap();
             stream_encoder.write_all(&orig_data[i..]).unwrap();
@@ -354,7 +325,7 @@ fn retrying_writes_that_error_with_interrupted_works() {
 
         // encode the normal way
         let engine = random_engine(&mut rng);
-        encode_engine_string(&orig_data, &mut normal_encoded, &engine);
+        engine.encode_string(&orig_data, &mut normal_encoded);
 
         // encode via the stream encoder
         {
@@ -365,7 +336,7 @@ fn retrying_writes_that_error_with_interrupted_works() {
                 fraction: 0.8,
             };
 
-            let mut stream_encoder = EncoderWriter::from(&mut interrupting_writer, &engine);
+            let mut stream_encoder = EncoderWriter::new(&mut interrupting_writer, &engine);
             let mut bytes_consumed = 0;
             while bytes_consumed < orig_len {
                 // use short inputs since we want to use `extra` a lot as that's what needs rollback
@@ -418,7 +389,7 @@ fn writes_that_only_write_part_of_input_and_sometimes_interrupt_produce_correct_
 
         // encode the normal way
         let engine = random_engine(&mut rng);
-        encode_engine_string(&orig_data, &mut normal_encoded, &engine);
+        engine.encode_string(&orig_data, &mut normal_encoded);
 
         // encode via the stream encoder
         {
@@ -430,7 +401,7 @@ fn writes_that_only_write_part_of_input_and_sometimes_interrupt_produce_correct_
                 no_interrupt_fraction: 0.1,
             };
 
-            let mut stream_encoder = EncoderWriter::from(&mut partial_writer, &engine);
+            let mut stream_encoder = EncoderWriter::new(&mut partial_writer, &engine);
             let mut bytes_consumed = 0;
             while bytes_consumed < orig_len {
                 // use at most medium-length inputs to exercise retry logic more aggressively
@@ -497,11 +468,11 @@ fn do_encode_random_config_matches_normal_encode(max_input_len: usize) {
 
         // encode the normal way
         let engine = random_engine(&mut rng);
-        encode_engine_string(&orig_data, &mut normal_encoded, &engine);
+        engine.encode_string(&orig_data, &mut normal_encoded);
 
         // encode via the stream encoder
         {
-            let mut stream_encoder = EncoderWriter::from(&mut stream_encoded, &engine);
+            let mut stream_encoder = EncoderWriter::new(&mut stream_encoded, &engine);
             let mut bytes_consumed = 0;
             while bytes_consumed < orig_len {
                 let input_len: usize =
diff --git a/src/write/mod.rs b/src/write/mod.rs
index ef9be61..2a617db 100644
--- a/src/write/mod.rs
+++ b/src/write/mod.rs
@@ -1,9 +1,11 @@
 //! Implementations of `io::Write` to transparently handle base64.
 mod encoder;
 mod encoder_string_writer;
-pub use self::encoder::EncoderWriter;
-pub use self::encoder_string_writer::EncoderStringWriter;
-pub use self::encoder_string_writer::StrConsumer;
+
+pub use self::{
+    encoder::EncoderWriter,
+    encoder_string_writer::{EncoderStringWriter, StrConsumer},
+};
 
 #[cfg(test)]
 mod encoder_tests;
diff --git a/tests/encode.rs b/tests/encode.rs
index 80c19d2..2e1f893 100644
--- a/tests/encode.rs
+++ b/tests/encode.rs
@@ -1,9 +1,9 @@
-use base64::alphabet::URL_SAFE;
-use base64::engine::fast_portable::PAD;
-use base64::*;
+use base64::{
+    alphabet::URL_SAFE, engine::general_purpose::PAD, engine::general_purpose::STANDARD, *,
+};
 
 fn compare_encode(expected: &str, target: &[u8]) {
-    assert_eq!(expected, encode(target));
+    assert_eq!(expected, STANDARD.encode(target));
 }
 
 #[test]
@@ -55,9 +55,6 @@ fn encode_all_bytes_url() {
          -AgYKDhIWGh4iJiouMjY6PkJGSk5SVlpeYmZqbnJ2en6ChoqOkpaanqKmqq6ytrq\
          -wsbKztLW2t7i5uru8vb6_wMHCw8TFxsfIycrLzM3Oz9DR0tPU1dbX2Nna29zd3t_g4eLj5OXm5-jp6uvs7e7v8PHy\
          8_T19vf4-fr7_P3-_w==",
-        encode_engine(
-            &bytes,
-            &engine::fast_portable::FastPortable::from(&URL_SAFE, PAD),
-        )
+        &engine::GeneralPurpose::new(&URL_SAFE, PAD).encode(&bytes)
     );
 }
diff --git a/tests/tests.rs b/tests/tests.rs
index ce2521b..eceff40 100644
--- a/tests/tests.rs
+++ b/tests/tests.rs
@@ -1,10 +1,9 @@
 use rand::{Rng, SeedableRng};
 
-use base64::engine::{Engine, DEFAULT_ENGINE};
+use base64::engine::{general_purpose::STANDARD, Engine};
 use base64::*;
 
-use base64::alphabet::STANDARD;
-use base64::engine::fast_portable::{FastPortable, NO_PAD};
+use base64::engine::general_purpose::{GeneralPurpose, NO_PAD};
 
 // generate random contents of the specified length and test encode/decode roundtrip
 fn roundtrip_random<E: Engine>(
@@ -28,8 +27,8 @@ fn roundtrip_random<E: Engine>(
             byte_buf.push(r.gen::<u8>());
         }
 
-        encode_engine_string(&byte_buf, str_buf, engine);
-        decode_engine_vec(&str_buf, &mut decode_buf, engine).unwrap();
+        engine.encode_string(&byte_buf, str_buf);
+        engine.decode_vec(&str_buf, &mut decode_buf).unwrap();
 
         assert_eq!(byte_buf, &decode_buf);
     }
@@ -56,14 +55,7 @@ fn roundtrip_random_short_standard() {
     let mut str_buf = String::new();
 
     for input_len in 0..40 {
-        roundtrip_random(
-            &mut byte_buf,
-            &mut str_buf,
-            &DEFAULT_ENGINE,
-            input_len,
-            4,
-            10000,
-        );
+        roundtrip_random(&mut byte_buf, &mut str_buf, &STANDARD, input_len, 4, 10000);
     }
 }
 
@@ -73,14 +65,7 @@ fn roundtrip_random_with_fast_loop_standard() {
     let mut str_buf = String::new();
 
     for input_len in 40..100 {
-        roundtrip_random(
-            &mut byte_buf,
-            &mut str_buf,
-            &DEFAULT_ENGINE,
-            input_len,
-            4,
-            1000,
-        );
+        roundtrip_random(&mut byte_buf, &mut str_buf, &STANDARD, input_len, 4, 1000);
     }
 }
 
@@ -89,7 +74,7 @@ fn roundtrip_random_short_no_padding() {
     let mut byte_buf: Vec<u8> = Vec::new();
     let mut str_buf = String::new();
 
-    let engine = FastPortable::from(&STANDARD, NO_PAD);
+    let engine = GeneralPurpose::new(&alphabet::STANDARD, NO_PAD);
     for input_len in 0..40 {
         roundtrip_random(&mut byte_buf, &mut str_buf, &engine, input_len, 4, 10000);
     }
@@ -100,7 +85,7 @@ fn roundtrip_random_no_padding() {
     let mut byte_buf: Vec<u8> = Vec::new();
     let mut str_buf = String::new();
 
-    let engine = FastPortable::from(&STANDARD, NO_PAD);
+    let engine = GeneralPurpose::new(&alphabet::STANDARD, NO_PAD);
 
     for input_len in 40..100 {
         roundtrip_random(&mut byte_buf, &mut str_buf, &engine, input_len, 4, 1000);
@@ -120,11 +105,11 @@ fn roundtrip_decode_trailing_10_bytes() {
         let mut s: String = "ABCD".repeat(num_quads);
         s.push_str("EFGHIJKLZg");
 
-        let engine = FastPortable::from(&STANDARD, NO_PAD);
-        let decoded = decode_engine(&s, &engine).unwrap();
+        let engine = GeneralPurpose::new(&alphabet::STANDARD, NO_PAD);
+        let decoded = engine.decode(&s).unwrap();
         assert_eq!(num_quads * 3 + 7, decoded.len());
 
-        assert_eq!(s, encode_engine(&decoded, &engine));
+        assert_eq!(s, engine.encode(&decoded));
     }
 }
 
@@ -138,21 +123,39 @@ fn display_wrapper_matches_normal_encode() {
     bytes.push(255);
 
     assert_eq!(
-        encode(&bytes),
-        format!("{}", display::Base64Display::from(&bytes, &DEFAULT_ENGINE))
+        STANDARD.encode(&bytes),
+        format!("{}", display::Base64Display::new(&bytes, &STANDARD))
     );
 }
 
 #[test]
-#[should_panic(expected = "index 24 out of range for slice of length 22")]
-fn encode_engine_slice_panics_when_buffer_too_small() {
-    let mut buf: [u8; 22] = [0; 22];
-    let mut input: [u8; 16] = [0; 16];
-
-    let mut rng = rand::rngs::SmallRng::from_entropy();
-    for elt in &mut input {
-        *elt = rng.gen();
+fn encode_engine_slice_error_when_buffer_too_small() {
+    for num_triples in 1..100 {
+        let input = "AAA".repeat(num_triples);
+        let mut vec = vec![0; (num_triples - 1) * 4];
+        assert_eq!(
+            EncodeSliceError::OutputSliceTooSmall,
+            STANDARD.encode_slice(&input, &mut vec).unwrap_err()
+        );
+        vec.push(0);
+        assert_eq!(
+            EncodeSliceError::OutputSliceTooSmall,
+            STANDARD.encode_slice(&input, &mut vec).unwrap_err()
+        );
+        vec.push(0);
+        assert_eq!(
+            EncodeSliceError::OutputSliceTooSmall,
+            STANDARD.encode_slice(&input, &mut vec).unwrap_err()
+        );
+        vec.push(0);
+        assert_eq!(
+            EncodeSliceError::OutputSliceTooSmall,
+            STANDARD.encode_slice(&input, &mut vec).unwrap_err()
+        );
+        vec.push(0);
+        assert_eq!(
+            num_triples * 4,
+            STANDARD.encode_slice(&input, &mut vec).unwrap()
+        );
     }
-
-    encode_engine_slice(input, &mut buf, &DEFAULT_ENGINE);
 }