update docs

BLAKE3-team · Sep 11, 2023 · c4f91a1 · c4f91a1
1 parent 9c07dc5
commit c4f91a1
Show file tree

Hide file tree

Showing 3 changed files with 71 additions and 30 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -13,7 +13,7 @@ edition = "2021"
 default = ["std"]
 
 # Helper functions for high-performance streaming and memory mapping.
-io = ["memmap2", "rayon", "std"]
+io = ["memmap2", "std"]
 
 # The NEON implementation does not participate in dynamic feature detection,
 # which is currently x86-only. If "neon" is on, NEON support is assumed. Note
@@ -83,8 +83,8 @@ no_neon = []
 zeroize = ["zeroize_crate", "arrayvec/zeroize"]
 
 [package.metadata.docs.rs]
-# Document Hasher::update_rayon on docs.rs.
-features = ["rayon"]
+# Document Hasher::update_rayon and the io module on docs.rs.
+features = ["io", "rayon"]
 
 [dependencies]
 arrayref = "0.3.5"

diff --git a/src/io.rs b/src/io.rs
@@ -1,31 +1,33 @@
 //! Helper functions for efficient IO.
 //!
-//! # Examples
-//!
-//! ```no_run
-//! use std::io;
-//!
-//! use blake3::io::hash_path_maybe_mmap;
-//!
-//! fn main() -> io::Result<()> {
-//!     let args: Vec<_> = std::env::args_os().collect();
-//!     assert_eq!(args.len(), 2);
-//!     let path = &args[1];
-//!     let mut hasher = blake3::Hasher::new();
-//!     hash_path_maybe_mmap(&mut hasher, path)?;
-//!     println!("{}", hasher.finalize());
-//!     Ok(())
-//! }
-//! ```
+//! This module is gated by the `io` Cargo feature, which is disabled by default but enabled on
+//! [docs.rs](https://docs.rs). Note that the [`update_path_rayon`](update_path_rayon) function in
+//! this module also requires the `rayon` Cargo feature.
 
 use std::{fs::File, io, path::Path};
 
 /// Copy from `reader` to `hasher`, returning the number of bytes read.
 ///
-/// A 16 KiB buffer is enough to take advantage of all the SIMD instruction sets
-/// that we support, but `std::io::copy` currently uses 8 KiB. Most platforms
-/// can support at least 64 KiB, and there's some performance benefit to using
-/// bigger reads, so that's what we use here.
+/// A 16 KiB buffer is enough to take advantage of all the SIMD instruction sets that we support,
+/// but [`std::io::copy`](https://doc.rust-lang.org/std/io/fn.copy.html) currently uses 8 KiB. Most
+/// platforms can support at least 64 KiB, and there's some performance benefit to using bigger
+/// reads, so that's what we use here. The exact buffer size used by this function may be different
+/// for different targets and may change at any time. The only guarantee is that it will be large
+/// enough for all of this crate's SIMD implementations on the current platform.
+///
+/// # Example
+///
+/// ```no_run
+/// # use std::fs::File;
+/// # use std::io;
+/// # fn main() -> io::Result<()> {
+/// let file = File::open("file.txt")?;
+/// let mut hasher = blake3::Hasher::new();
+/// blake3::io::copy_wide(file, &mut hasher)?;
+/// println!("{}", hasher.finalize());
+/// # Ok(())
+/// # }
+/// ```
 pub fn copy_wide(
     mut reader: impl std::io::Read,
     hasher: &mut crate::Hasher,
@@ -105,12 +107,26 @@ fn maybe_memmap_file(file: &File) -> io::Result<Option<memmap2::Mmap>> {
 /// my experience, memory mapping improves single-threaded performance by ~10% for large files that
 /// are already in cache. This probably varies considerably between platforms, and as always it's a
 /// good idea to benchmark your own use case. In comparison, the multithreaded
-/// [`update_path_rayon`] function can have a much larger impact on performance.
+/// [`update_path_rayon`](update_path_rayon) function can have a much larger impact on performance.
 ///
 /// Not all files can be memory mapped, and memory mapping very small file can be slower than
 /// reading them the usual way. In those cases, this function will fall back to standard IO. The
 /// heuristic for whether to use memory mapping is currently very simple (file size >= 16 KiB), and
 /// it might change at any time.
+///
+/// # Example
+///
+/// ```no_run
+/// # use std::io;
+/// # use std::path::Path;
+/// # fn main() -> io::Result<()> {
+/// let path = Path::new("big_file.dat");
+/// let mut hasher = blake3::Hasher::new();
+/// blake3::io::update_path(&mut hasher, path)?;
+/// println!("{}", hasher.finalize());
+/// # Ok(())
+/// # }
+/// ```
 pub fn update_path(hasher: &mut crate::Hasher, path: impl AsRef<Path>) -> io::Result<()> {
     let file = File::open(path.as_ref())?;
     if let Some(mmap) = maybe_memmap_file(&file)? {
@@ -124,6 +140,9 @@ pub fn update_path(hasher: &mut crate::Hasher, path: impl AsRef<Path>) -> io::Re
 /// Update a [`Hasher`](crate::Hasher) with the contents of a file using memory mapping and
 /// multithreading.
 ///
+/// This function is gated by the `rayon` Cargo feature, which is disabled by default but enabled
+/// on [docs.rs](https://docs.rs).
+///
 /// This function is to [`update_path`] as [`Hasher::update_rayon`](crate::Hasher::update_rayon) is
 /// to [`Hasher::update`](crate::Hasher::update). The memory mapping behavior is the same, and the
 /// difference is that the mapped buffer is hashed with `update_rayon` internally. This strategy is
@@ -139,6 +158,23 @@ pub fn update_path(hasher: &mut crate::Hasher, path: impl AsRef<Path>) -> io::Re
 /// performance. If your code runs in different environments that you don't control or can't
 /// measure, then unfortunately there's no one-size-fits-all answer for whether multithreading is a
 /// good idea.
+///
+/// # Example
+///
+/// ```no_run
+/// # use std::io;
+/// # use std::path::Path;
+/// # fn main() -> io::Result<()> {
+/// # #[cfg(feature = "rayon")]
+/// # {
+/// let path = Path::new("big_file.dat");
+/// let mut hasher = blake3::Hasher::new();
+/// blake3::io::update_path_rayon(&mut hasher, path)?;
+/// println!("{}", hasher.finalize());
+/// # }
+/// # Ok(())
+/// # }
+/// ```
 #[cfg(feature = "rayon")]
 pub fn update_path_rayon(hasher: &mut crate::Hasher, path: impl AsRef<Path>) -> io::Result<()> {
     let file = File::open(path.as_ref())?;

diff --git a/src/lib.rs b/src/lib.rs
@@ -56,6 +56,9 @@
 //! it should expect breaking changes between patch versions. (The "-preview"
 //! feature name follows the conventions of the RustCrypto [`signature`] crate.)
 //!
+//! The `io` feature enables the [`io`](io) module, which provides helper
+//! functions for memory mapping and other efficient IO operations.
+//!
 //! [`Hasher::update_rayon`]: struct.Hasher.html#method.update_rayon
 //! [BLAKE3]: https://blake3.io
 //! [Rayon]: https://github.com/rayon-rs/rayon
@@ -1113,11 +1116,13 @@ impl Hasher {
     ///
     /// Memory mapping an entire input file is a simple way to take advantage of
     /// multithreading without needing to carefully tune your buffer size or
-    /// offload IO. However, on spinning disks where random access is expensive,
-    /// that approach can lead to disk thrashing and terrible IO performance.
-    /// Note that OS page caching can mask this problem, in which case it might
-    /// only appear for files larger than available RAM. Again, benchmarking
-    /// your specific use case is important.
+    /// offload IO. [`io::update_path_rayon`](io::update_path_rayon) is a helper
+    /// function that does this memory mapping for you. However, on spinning
+    /// disks where random access is expensive, that approach can lead to disk
+    /// thrashing and terrible IO performance. Note that OS page caching can
+    /// mask this problem, in which case it might only appear for files larger
+    /// than available RAM. Again, benchmarking your specific use case is
+    /// important.
     #[cfg(feature = "rayon")]
     pub fn update_rayon(&mut self, input: &[u8]) -> &mut Self {
         self.update_with_join::<join::RayonJoin>(input)