From 905778b7344e6a4f0b569b93d18d1cc6a8fb4ff7 Mon Sep 17 00:00:00 2001 From: Sean Olson Date: Wed, 6 Mar 2024 13:28:12 +0100 Subject: [PATCH] Introduce a more sophisticated `DepthBehavior` type. This change replaces the simple `usize` of the `WalkBehavior::depth` field with a much more rich `DepthBehavior` type. `DepthBehavior` supports both minimum and maximum depth bounds and is well-typed to provide better APIs and conversions. Perhaps more importantly, this change removes much of the awareness of invariant prefixes and adjusted roots in `Glob`s from the base walk implementation over paths. Root and relative paths are now more consistent, referring to the walked `Path` or, similarly, the path given to `Glob::walk` functions. The only exception are `Glob`s that have a root, for which the root is formed from the invariant prefix (because a rooted prefix escapes the path given to `Glob::walk` functions). Depth too is now more consistently applied from this root. Prefixes in `Glob`s are no longer exposed in walk APIs (`GlobWalker` has been removed from public APIs). To allow for more control over depth behaviors, `Glob` can expose depth variance, which can then be used to construct a `DepthBehavior`. This is not implemented by this change. --- src/lib.rs | 27 +- src/token/variance/bound.rs | 3 - src/token/variance/invariant/natural.rs | 15 +- src/walk/glob.rs | 314 +++++++-------- src/walk/mod.rs | 515 ++++++++++++++++++------ 5 files changed, 553 insertions(+), 321 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 5796ad8..2f5dd88 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -66,6 +66,7 @@ pub mod prelude { use miette::Diagnostic; use regex::Regex; use std::borrow::{Borrow, Cow}; +use std::cmp::Ordering; use std::convert::Infallible; use std::ffi::OsStr; use std::fmt::{self, Debug, Display, Formatter}; @@ -601,13 +602,6 @@ pub struct Glob<'t> { } impl<'t> Glob<'t> { - fn compile(tree: impl Borrow) -> Result - where - T: ConcatenationTree<'t>, - { - encode::compile(tree) - } - // TODO: Document pattern syntax in the crate documentation and refer to it here. /// Constructs a [`Glob`] from a glob expression. /// @@ -791,6 +785,13 @@ impl<'t> Glob<'t> { pub fn is_empty(&self) -> bool { self.tree.as_ref().as_token().is_empty() } + + fn compile(tree: impl Borrow) -> Result + where + T: ConcatenationTree<'t>, + { + encode::compile(tree) + } } impl Display for Glob<'_> { @@ -1063,6 +1064,18 @@ fn parse_and_check( Ok(checked) } +fn minmax(lhs: T, rhs: T) -> [T; 2] +where + T: Ord, +{ + use Ordering::{Equal, Greater, Less}; + + match lhs.cmp(&rhs) { + Equal | Less => [lhs, rhs], + Greater => [rhs, lhs], + } +} + #[cfg(test)] pub mod harness { use expect_macro::expect; diff --git a/src/token/variance/bound.rs b/src/token/variance/bound.rs index 663530f..3ba2e2a 100644 --- a/src/token/variance/bound.rs +++ b/src/token/variance/bound.rs @@ -211,9 +211,6 @@ impl From for NaturalRange { } } -// NOTE: Given the naturals X and Y where X < Y, this defines an unconventional meaning for the -// range [Y,X] and repetitions like `<_:10,1>`: the bounds are reordered, so `<_:10,1>` and -// `<_:1,10>` are the same. impl From<(usize, T)> for NaturalRange where T: Into>, diff --git a/src/token/variance/invariant/natural.rs b/src/token/variance/invariant/natural.rs index a8c3b05..a5d4bf9 100644 --- a/src/token/variance/invariant/natural.rs +++ b/src/token/variance/invariant/natural.rs @@ -1,4 +1,3 @@ -use std::cmp::Ordering; use std::num::NonZeroUsize; use crate::token::variance::bound::{ @@ -80,7 +79,7 @@ macro_rules! impl_invariant_natural { } fn bound(lhs: Self, rhs: Self) -> Boundedness { - let (lower, upper) = self::minmax(lhs, rhs); + let [lower, upper] = crate::minmax(lhs, rhs); BoundedVariantRange::try_from_lower_and_upper(lower.0, upper.0) .map_or(Unbounded, Bounded) } @@ -144,15 +143,3 @@ impl GlobVariance { !self.has_upper_bound() } } - -fn minmax(lhs: T, rhs: T) -> (T, T) -where - T: Ord, -{ - use Ordering::{Equal, Greater, Less}; - - match lhs.cmp(&rhs) { - Equal | Less => (lhs, rhs), - Greater => (rhs, lhs), - } -} diff --git a/src/walk/glob.rs b/src/walk/glob.rs index b668146..9c6eae5 100644 --- a/src/walk/glob.rs +++ b/src/walk/glob.rs @@ -26,19 +26,18 @@ impl<'t> Glob<'t> { /// directory trees. /// /// As with [`Path::join`] and [`PathBuf::push`], the base directory can be escaped or - /// overridden by rooted `Glob`s. In many cases, the current working directory `.` is an - /// appropriate base directory and will be intuitively ignored if the `Glob` is rooted, such as - /// in `/mnt/media/**/*.mp4`. The [`has_root`] function can be used to check if a `Glob` is - /// rooted. - /// - /// The root directory is either the given directory or, if rooted, the [invariant - /// prefix][`Glob::partition`] of the `Glob`. Either way, this function joins the given - /// directory with any invariant prefix to potentially begin the walk as far down the tree as - /// possible. **The prefix and any [semantic literals][`Glob::has_semantic_literals`] in this - /// prefix are interpreted semantically as a path**, so components like `.` and `..` that - /// precede variant patterns interact with the base directory semantically. This means that - /// expressions like `../**` escape the base directory as expected on Unix and Windows, for - /// example. To query the root directory of the walk, see [`Glob::walker`]. + /// overridden by [a `Glob` that has a root][`has_root`]. In many cases, the current working + /// directory `.` is an appropriate base directory and will be intuitively ignored if the + /// `Glob` is rooted, such as in `/mnt/media/**/*.mp4`. + /// + /// The [root path segment][`Entry::root_relative_paths`] is either the given directory or, if + /// the `Glob` has a root, the [invariant prefix][`Glob::partition`] of the `Glob`. Either way, + /// this function joins the given directory with any invariant prefix in the `Glob` to + /// potentially begin the walk as far down the tree as possible. **The prefix and any [semantic + /// literals][`Glob::has_semantic_literals`] in this prefix are interpreted semantically as a + /// path**, so components like `.` and `..` that precede variant patterns interact with the + /// base directory semantically. This means that expressions like `../**` escape the base + /// directory as expected on Unix and Windows, for example. /// /// This function uses the default [`WalkBehavior`]. To configure the behavior of the /// traversal, see [`Glob::walk_with_behavior`]. @@ -60,9 +59,9 @@ impl<'t> Glob<'t> { /// ``` /// /// Glob expressions do not support general negations, but the [`not`] combinator can be used - /// when walking a directory tree to filter entries using patterns. **This should generally be - /// preferred over functions like [`Iterator::filter`], because it avoids unnecessary reads of - /// directory trees when matching [exhaustive negations][`Program::is_exhaustive`].** + /// when walking a directory tree to filter entries using patterns. **Prefer this over + /// functions like [`Iterator::filter`], because it avoids unnecessary reads of directory trees + /// when matching [exhaustive negations][`Program::is_exhaustive`].** /// /// ```rust,no_run /// use wax::walk::{Entry, FileIterator}; @@ -80,8 +79,8 @@ impl<'t> Glob<'t> { /// ``` /// /// [`Any`]: crate::Any + /// [`Entry::root_relative_paths`]: crate::walk::Entry::root_relative_paths /// [`Glob::walk_with_behavior`]: crate::Glob::walk_with_behavior - /// [`Glob::walker`]: crate::Glob::walker /// [`GlobEntry`]: crate::walk::GlobEntry /// [`has_root`]: crate::Glob::has_root /// [`FileIterator`]: crate::walk::FileIterator @@ -92,21 +91,17 @@ impl<'t> Glob<'t> { /// [`Program`]: crate::Program /// [`Program::is_exhaustive`]: crate::Program::is_exhaustive /// [`WalkBehavior`]: crate::walk::WalkBehavior - pub fn walk( - &self, - directory: impl Into, - ) -> impl 'static + FileIterator { - self.walk_with_behavior(directory, WalkBehavior::default()) + pub fn walk(&self, path: impl Into) -> impl 'static + FileIterator { + self.walk_with_behavior(path, WalkBehavior::default()) } /// Gets an iterator over matching files in a directory tree. /// /// This function is the same as [`Glob::walk`], but it additionally accepts a [`WalkBehavior`] - /// that configures how the traversal interacts with symbolic links, the maximum depth from the - /// root, etc. + /// that configures how the traversal interacts with symbolic links, bounds on depth, etc. /// - /// Depth is relative to the root directory of the traversal, which is determined by joining - /// the given path and any [invariant prefix][`Glob::partition`] of the `Glob`. + /// Depth is bounded relative to [the root path segment][`Entry::root_relative_paths`] + /// of the traversal. /// /// See [`Glob::walk`] for more information. /// @@ -138,52 +133,45 @@ impl<'t> Glob<'t> { /// } /// ``` /// - /// [`Glob::partition`]: crate::Glob::partition + /// [`Entry::root_relative_paths`]: crate::walk::Entry::root_relative_paths /// [`Glob::walk`]: crate::Glob::walk /// [`LinkBehavior`]: crate::walk::LinkBehavior /// [`WalkBehavior`]: crate::walk::WalkBehavior pub fn walk_with_behavior( &self, - directory: impl Into, + path: impl Into, behavior: impl Into, ) -> impl 'static + FileIterator { - self.walker(directory).walk_with_behavior(behavior) - } - - /// Gets an iterator builder over matching files in a directory tree. - /// - /// This function gets an intermediate walker that describes iteration over matching files and - /// provides paths prior to iteration. In particular, `walker` can be used when the root - /// directory of the walk is needed. **The root directory may differ from the directory passed - /// to walking functions.** - /// - /// See [`Glob::walk`]. - /// - /// # Examples - /// - /// ```rust,no_run - /// use wax::walk::Entry; - /// use wax::Glob; - /// - /// let glob = Glob::new("**/*.{log,txt}").unwrap(); - /// let walker = glob.walker("/var/log"); - /// let root = walker.root_prefix_paths().0.to_path_buf(); - /// for entry in walker.walk() { - /// let entry = entry.unwrap(); - /// println!("Log: {:?}", entry.path()); - /// } - /// ``` - /// - /// [`Glob::walk`]: crate::Glob::walk - pub fn walker(&self, directory: impl Into) -> GlobWalker { GlobWalker { - anchor: self.anchor(directory), - program: WalkProgram::from_glob(self), + anchor: self.anchor(path), + program: WalkProgram { + complete: self.program.clone(), + // Do not compile component programs for empty globs. + // + // An empty glob consists solely of an empty literal token and only matches empty + // text (""). A walk program compiled from such a glob has an empty component + // pattern and matches nothing. This means that walking an empty glob never yields + // any paths. At first blush, this seems consistent with an empty glob. However, + // walking conceptually matches a glob against the sub-trees in a path and there is + // arguably an implicit empty tree. This is also more composable when partitioning + // and (re)building paths. + // + // The result is that matching an empty glob against the path `foo` yields `foo` + // and only `foo` (assuming that the path exists). + components: if self.is_empty() { + vec![] + } + else { + WalkProgram::compile::>(self.tree.as_ref()) + .expect("failed to compile walk program") + }, + }, } + .walk_with_behavior(behavior) } - fn anchor(&self, directory: impl Into) -> Anchor { - let directory = directory.into(); + fn anchor(&self, path: impl Into) -> Anchor { + let path = path.into(); let prefix: Option = { let (_, prefix) = self.tree.as_ref().as_token().invariant_text_prefix(); if prefix.is_empty() { @@ -193,44 +181,50 @@ impl<'t> Glob<'t> { Some(prefix.into()) } }; - // Establish the root directory and any prefix in that root path that is not a part of the - // glob expression. The directory tree is traversed from `root`, which may include an - // invariant prefix from the glob. The `prefix` is an integer that specifies how many - // components from the end of the root path must be popped to get the portion of the root - // path that is not present in the glob. The prefix may be empty or may be the entirety of - // `root` depending on `directory` and the glob. + // Establish the root path and any pivot in that root path from the given directory and any + // invariant prefix in the glob. The file system is traversed from this root path. The + // pivot partitions the root path into the given directory and any invariant prefix by + // specifying how many components from the end of the root path must be popped to restore + // the given directory. The popped components form the invariant prefix of the glob. Either + // partition of the root path may be empty depending on the given directory and the glob + // pattern. In this way, any invariant prefix of the glob becomes a postfix in the root + // path. // - // Note that a rooted glob, like in `Path::join`, replaces `directory` when establishing - // the root path. In this case, there is no prefix, as the entire root path is present in - // the glob expression. - let (root, prefix) = match prefix { - Some(prefix) => directory.join_and_get_depth(prefix), - _ => (directory, 0), + // Note that a rooted glob, like in `Path::join`, replaces the given directory when + // establishing the root path. In this case, there is no invariant prefix (the pivot is + // zero), as the entire root path is present in the glob expression and the given directory + // is completely discarded. + let (root, pivot) = match prefix { + Some(prefix) => path.join_and_get_depth(prefix), + _ => (path, 0), }; - Anchor { root, prefix } + Anchor { root, pivot } } } -/// Root path and prefix of a `Glob` when walking a particular path. +/// Root path and pivot of a `Glob` when walking a particular target path. +/// +/// For unrooted globs, the pivot can be used to isolate the target path given to walk functions +/// like `Glob::walk`. This is necessary to implement `Entry` and for interpreting depth behavior, +/// which is always relative to the target path (and ignores any invariant prefix in a glob). #[derive(Clone, Debug)] struct Anchor { - /// The root (starting) directory of the walk. + /// The root path of the walk. + /// + /// This root, unlike in `PathExt::walk`, may include an invariant prefix from a glob. root: PathBuf, - // TODO: Is there a better name for this? This is a prefix w.r.t. a glob but is a suffix w.r.t. - // the root directory. This can be a bit confusing since either perspective is reasonable - // (and in some contexts one may be more intuitive than the other). - /// The number of components from the end of `root` that are present in the `Glob`'s - /// expression. - prefix: usize, + /// The number of components from the end of `root` that are present in any invariant prefix of + /// the glob expression. + /// + /// The pivot partitions the root path into the target path and any invariant prefix in the + /// `Glob` (this prefix becomes a postfix in the root path or, when rooted, replaces any target + /// path). + pivot: usize, } impl Anchor { - pub fn root_prefix_paths(&self) -> (&Path, &Path) { - self.root.split_at_depth(self.prefix) - } - pub fn walk_with_behavior(self, behavior: impl Into) -> WalkTree { - WalkTree::with_prefix_and_behavior(self.root, self.prefix, behavior) + WalkTree::with_pivot_and_behavior(self.root, self.pivot, behavior) } } @@ -256,84 +250,16 @@ impl WalkProgram { } Ok(regexes) } - - fn from_glob(glob: &Glob<'_>) -> Self { - WalkProgram { - complete: glob.program.clone(), - // Do not compile component programs for empty globs. - // - // An empty glob consists solely of an empty literal token and only matches empty text - // (""). A walk program compiled from such a glob has an empty component pattern and - // matches nothing. This means that walking an empty glob never yields any paths. At - // first blush, this seems consistent with an empty glob. However, walking conceptually - // matches a glob against the subtrees in a path and there is arguably an implicit - // empty tree. This is also more composable when partitioning and (re)building paths. - // - // The result is that matching an empty glob against the path `foo` yields `foo` and - // only `foo` (assuming that the path exists). - components: if glob.is_empty() { - vec![] - } - else { - WalkProgram::compile::>(glob.tree.as_ref()) - .expect("failed to compile glob component expressions") - }, - } - } } /// Describes iteration over matching files in a directory tree. -/// -/// A walker provides the paths walked by a [`Glob`] prior to iteration, most notably the [root -/// path][`GlobWalker::root_prefix_paths`], which may differ from the directory passed to walking -/// functions. When ready, it can be converted into an iterator over matching files. -/// -/// See [`Glob::walker`]. -/// -/// [`Glob`]: crate::Glob -/// [`Glob::walker`]: crate::Glob::walker -/// [`GlobWalker::root_prefix_paths`]: crate::walk::GlobWalker::root_prefix_paths #[derive(Clone, Debug)] -pub struct GlobWalker { +struct GlobWalker { anchor: Anchor, program: WalkProgram, } impl GlobWalker { - /// Gets the root and prefix paths. - /// - /// The root path is the path to the walked directory tree. **This path may differ from the - /// directory passed to walking functions like [`Glob::walk`]**, because it may incorporate an - /// invariant path prefix from the glob expression. - /// - /// The prefix path is the invariant path prefix of the glob expression. This path may be empty - /// and is always a suffix of the root path. - /// - /// The following table describes some example paths when using [`Glob::walk`]. - /// - /// | Glob Expression | Directory | Root | Prefix | - /// |---------------------------|--------------|--------------|------------| - /// | `**/*.txt` | `/home/user` | `/home/user` | | - /// | `projects/**/src/**/*.rs` | `.` | `./projects` | `projects` | - /// | `/var/log/**/*.log` | `.` | `/var/log` | `/var/log` | - /// - /// See also [`Entry::root_relative_paths`]. - /// - /// [`Entry::root_relative_paths`]: crate::walk::Entry::root_relative_paths - /// [`Glob::walk`]: crate::Glob::walk - pub fn root_prefix_paths(&self) -> (&Path, &Path) { - self.anchor.root_prefix_paths() - } - - /// Converts a walker into an iterator over matching files in its directory tree. - /// - /// See [`Glob::walk`]. - /// - /// [`Glob::walk`]: crate::Glob::walk - pub fn walk(self) -> impl 'static + FileIterator { - self.walk_with_behavior(WalkBehavior::default()) - } - /// Converts a walker into an iterator over matching files in its directory tree. /// /// See [`Glob::walk_with_behavior`]. @@ -343,6 +269,7 @@ impl GlobWalker { self, behavior: impl Into, ) -> impl 'static + FileIterator { + let pivot = self.anchor.pivot; self.anchor .walk_with_behavior(behavior) .filter_map_tree(move |cancellation, separation| { @@ -360,7 +287,7 @@ impl GlobWalker { _ => unreachable!(), }; let entry = filtrate.as_ref(); - let (_, path) = entry.root_relative_paths(); + let (_, path) = self::root_relative_paths(entry.path(), entry.depth(), pivot); let depth = entry.depth().saturating_sub(1); for (position, candidate) in path .components() @@ -391,7 +318,13 @@ impl GlobWalker { .map(MatchedText::into_owned) { filtrate - .map(|entry| Ok(GlobEntry { entry, matched })) + .map(|entry| { + Ok(GlobEntry { + entry, + pivot, + matched, + }) + }) .into() } else { @@ -414,7 +347,13 @@ impl GlobWalker { .map(MatchedText::into_owned) { filtrate - .map(|entry| Ok(GlobEntry { entry, matched })) + .map(|entry| { + Ok(GlobEntry { + entry, + pivot, + matched, + }) + }) .into() } else { @@ -437,7 +376,13 @@ impl GlobWalker { .map(MatchedText::into_owned) { return filtrate - .map(|entry| Ok(GlobEntry { entry, matched })) + .map(|entry| { + Ok(GlobEntry { + entry, + pivot, + matched, + }) + }) .into(); } filtrate.filter_node().into() @@ -461,22 +406,7 @@ enum FilterAnyProgram { } impl FilterAnyProgram { - fn compile<'t, I>(tokens: I) -> Result, BuildError> - where - I: IntoIterator, - I::Item: Pattern<'t>, - I::IntoIter: ExactSizeIterator, - { - let tokens = tokens.into_iter(); - if 0 == tokens.len() { - Ok(None) - } - else { - crate::any(tokens).map(|any| Some(any.program)) - } - } - - fn try_from_partitions<'t, I>(exhaustive: I, nonexhaustive: I) -> Result + pub fn try_from_partitions<'t, I>(exhaustive: I, nonexhaustive: I) -> Result where I: IntoIterator, I::Item: Pattern<'t>, @@ -520,6 +450,21 @@ impl FilterAnyProgram { _ => None, } } + + fn compile<'t, I>(tokens: I) -> Result, BuildError> + where + I: IntoIterator, + I::Item: Pattern<'t>, + I::IntoIter: ExactSizeIterator, + { + let tokens = tokens.into_iter(); + if 0 == tokens.len() { + Ok(None) + } + else { + crate::any(tokens).map(|any| Some(any.program)) + } + } } /// Negated glob combinator that efficiently filters file entries against patterns. @@ -584,6 +529,7 @@ impl FilterAny { #[derive(Debug)] pub struct GlobEntry { entry: TreeEntry, + pivot: usize, matched: MatchedText<'static>, } @@ -618,7 +564,7 @@ impl Entry for GlobEntry { } fn root_relative_paths(&self) -> (&Path, &Path) { - self.entry.root_relative_paths() + self::root_relative_paths(self.path(), self.entry.depth(), self.pivot) } fn file_type(&self) -> FileType { @@ -629,9 +575,11 @@ impl Entry for GlobEntry { self.entry.metadata().map_err(WalkError::from) } - // TODO: This needs some work and requires some explanation when applied to globs. fn depth(&self) -> usize { - self.entry.depth() + self.entry + .depth() + .checked_add(self.pivot) + .expect("overflow determining depth") } } @@ -640,3 +588,11 @@ impl From for TreeEntry { entry.entry } } + +fn root_relative_paths(path: &Path, depth: usize, pivot: usize) -> (&Path, &Path) { + path.split_at_depth( + depth + .checked_add(pivot) + .expect("overflow determining root and relative paths"), + ) +} diff --git a/src/walk/mod.rs b/src/walk/mod.rs index d668c77..cee8cba 100644 --- a/src/walk/mod.rs +++ b/src/walk/mod.rs @@ -69,6 +69,7 @@ mod glob; use std::fs::{FileType, Metadata}; use std::io; +use std::num::NonZeroUsize; use std::path::{Path, PathBuf}; use thiserror::Error; use walkdir::{self, DirEntry, WalkDir}; @@ -80,7 +81,7 @@ use crate::filter::{ use crate::walk::glob::FilterAny; use crate::{BuildError, Pattern}; -pub use crate::walk::glob::{GlobEntry, GlobWalker}; +pub use crate::walk::glob::GlobEntry; type FileFiltrate = Result; type FileResidue = TreeResidue; @@ -166,6 +167,11 @@ impl WalkError { } /// Gets the depth at which the error occurred from the root directory of the traversal. + /// + /// **This depth may differ from the depth reported by [`Entry::depth`]** when matching a pattern + /// against a directory tree. + /// + /// [`Entry::depth`]: crate::walk::Entry::depth pub fn depth(&self) -> usize { self.depth } @@ -262,8 +268,8 @@ pub trait PathExt { /// Gets an iterator over files in the directory tree at the path. /// /// This function is the same as [`PathExt::walk`], but it additionally accepts a - /// [`WalkBehavior`] that configures how the traversal interacts with symbolic links, the - /// maximum depth from the root, etc. + /// [`WalkBehavior`] that configures how the traversal interacts with symbolic links, bounds on + /// depth, etc. /// /// # Examples /// @@ -292,10 +298,15 @@ impl PathExt for Path { /// Configuration for interpreting symbolic links. /// /// Determines how symbolic links are interpreted when walking directory trees using functions like -/// [`Glob::walk_with_behavior`]. **By default, symbolic links are read as regular files and their -/// targets are ignored.** +/// [`Glob::walk_with_behavior`]. +/// +/// # Defaults +/// +/// The default link behavior is [`ReadFile`] (links are read as regular files and their targets +/// are ignored). /// /// [`Glob::walk_with_behavior`]: crate::Glob::walk_with_behavior +/// [`ReadFile`]: crate::walk::LinkBehavior::ReadFile #[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] pub enum LinkBehavior { /// Read the symbolic link file itself. @@ -311,18 +322,259 @@ pub enum LinkBehavior { /// of the link file and its metadata describes the target. If the target is a directory, then /// traversal follows the link and descend into the target. /// - /// If a link is reentrant and forms a cycle, then an error will be emitted instead of an entry - /// and traversal does not follow the link. + /// If a link is re-entrant and forms a cycle, then an error will be emitted instead of an + /// entry and traversal does not follow the link. ReadTarget, } +/// Configuration for a minimum depth of matched files in a walk. +/// +/// Unlike a maximum depth, a minimum depth cannot be zero, because such a minimum has no effect. +/// To configure a minimum depth or else an unbounded depth, use +/// [`DepthMin::from_min_or_unbounded`]. +/// +/// [`DepthMin::from_min_or_unbounded`]: crate::walk::DepthMin::from_min_or_unbounded +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct DepthMin(pub NonZeroUsize); + +impl DepthMin { + /// Constructs a [`DepthBehavior`] with a minimum depth or, if zero, unbounded. + /// + /// # Examples + /// + /// The following example places a minimum bound on the depth of a walk. + /// + /// ```rust,no_run + /// use wax::walk::DepthMin; + /// use wax::Glob; + /// + /// for entry in Glob::new("**") + /// .unwrap() + /// .walk_with_behavior(".", DepthMin::from_min_or_unbounded(1)) + /// { + /// let entry = entry.unwrap(); + /// // ... + /// } + /// ``` + /// + /// [`DepthBehavior`]: crate::walk::DepthBehavior + pub fn from_min_or_unbounded(min: usize) -> DepthBehavior { + use DepthBehavior::{Min, Unbounded}; + + DepthMin::try_from(min).map(Min).unwrap_or(Unbounded) + } + + fn min_at_pivot(self, pivot: usize) -> usize { + self.0.get().saturating_sub(pivot) + } +} + +impl From for DepthMin { + fn from(min: NonZeroUsize) -> Self { + DepthMin(min) + } +} + +impl From for NonZeroUsize { + fn from(min: DepthMin) -> Self { + min.0 + } +} + +impl TryFrom for DepthMin { + type Error = (); + + fn try_from(min: usize) -> Result { + NonZeroUsize::new(min).map(DepthMin).ok_or(()) + } +} + +/// Configuration for a maximum depth of a walk. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct DepthMax(pub usize); + +impl DepthMax { + fn max_at_pivot(self, pivot: usize) -> usize { + self.0.saturating_sub(pivot) + } +} + +impl From for DepthMax { + fn from(max: usize) -> Self { + DepthMax(max) + } +} + +impl From for usize { + fn from(max: DepthMax) -> Self { + max.0 + } +} + +/// Configuration for minimum and maximum depths of a walk and matched files. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct DepthMinMax { + pub min: NonZeroUsize, + pub extent: usize, +} + +impl DepthMinMax { + /// Constructs a [`DepthBehavior`] with a maximum depth and, if nonzero, a minimum depth. + /// + /// The depths need not be ordered. + /// + /// # Examples + /// + /// The following example places both a minimum and maximum bound on the depth of a walk. + /// + /// ```rust,no_run + /// use wax::walk::DepthMinMax; + /// use wax::Glob; + /// + /// for entry in Glob::new("**") + /// .unwrap() + /// .walk_with_behavior(".", DepthMinMax::from_depths_or_max(1, 2)) + /// { + /// let entry = entry.unwrap(); + /// // ... + /// } + /// ``` + /// + /// [`DepthBehavior`]: crate::walk::DepthBehavior + pub fn from_depths_or_max(p: usize, q: usize) -> DepthBehavior { + use DepthBehavior::{Max, MinMax}; + + let [min, max] = crate::minmax(p, q); + let extent = max - min; + NonZeroUsize::new(min) + .map(|min| DepthMinMax { min, extent }) + .map_or_else(|| Max(DepthMax(max)), MinMax) + } + + fn min_max_at_pivot(self, pivot: usize) -> (usize, usize) { + ( + self.min.get().saturating_sub(pivot), + self.max().get().saturating_sub(pivot), + ) + } + + pub fn max(&self) -> NonZeroUsize { + self.min.saturating_add(self.extent) + } +} + +/// Configuration for filtering walks and files by depth. +/// +/// Determines the minimum and maximum depths of a walk and files yielded by that walk relative to +/// the [root path segment][`Entry::root_relative_paths`]. A minimum depth only filters files, but +/// a maximum depth also limits the depth of the walk (directories beneath the maximum are not read +/// from the file system). +/// +/// See [`WalkBehavior`]. +/// +/// # Defaults +/// +/// The default depth behavior is [`Unbounded`]. +/// +/// [`Entry::root_relative_paths`]: crate::walk::Entry::root_relative_paths +/// [`Unbounded`]: crate::walk::DepthBehavior::Unbounded +/// [`WalkBehavior`]: crate::walk::WalkBehavior +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] +pub enum DepthBehavior { + #[default] + Unbounded, + Min(DepthMin), + Max(DepthMax), + MinMax(DepthMinMax), +} + +impl DepthBehavior { + // TODO: Provide a similar function for `Glob`s called something like + // `bounded_with_depth_variance`, which additionally accepts a depth variance and + // considers this variance when constructing the `DepthBehavior`. + /// Constructs a bounded `DepthBehavior` from a minimum and/or maximum depth. + /// + /// This function provides an ergonomic way to place bounds on the depth of a walk. At least + /// one closed depth is required. A given depth is closed if `Some` and is open if `None`. Note + /// that a closed depth need not be explicitly wrapped in `Some`, because the depth parameters + /// are `impl Into>`. + /// + /// Returns `None` if both the minimum and maximum depths are both open (unbounded) or if both + /// depths are closed but are misordered (the minimum is greater than the maximum). Never + /// returns [`Unbounded`]. + /// + /// # Examples + /// + /// The following example places a maximum bound on the depth of a walk by using an open + /// minimum depth (`None`). + /// + /// ```rust,no_run + /// use wax::walk::DepthBehavior; + /// use wax::Glob; + /// + /// for entry in Glob::new("**") + /// .unwrap() + /// .walk_with_behavior(".", DepthBehavior::bounded(None, 2).unwrap()) + /// { + /// let entry = entry.unwrap(); + /// // ... + /// } + /// ``` + /// + /// [`Unbounded`]: crate::walk::DepthBehavior::Unbounded + pub fn bounded(min: impl Into>, max: impl Into>) -> Option { + use DepthBehavior::{Max, Min, MinMax}; + + match (min.into(), max.into()) { + (Some(min), None) => NonZeroUsize::new(min).map(DepthMin).map(Min), + (None, Some(max)) => Some(Max(DepthMax(max))), + (Some(min), Some(max)) if min <= max => NonZeroUsize::new(min) + .map(|min| DepthMinMax { + min, + extent: max - min.get(), + }) + .map(MinMax), + _ => None, + } + } +} + +impl From for DepthBehavior { + fn from(max: DepthMax) -> Self { + DepthBehavior::Max(max) + } +} + +impl From for DepthBehavior { + fn from(min: DepthMin) -> Self { + DepthBehavior::Min(min) + } +} + +impl From for DepthBehavior { + fn from(minmax: DepthMinMax) -> Self { + DepthBehavior::MinMax(minmax) + } +} + /// Configuration for walking directory trees. /// /// Determines the behavior of the traversal within a directory tree when using functions like /// [`Glob::walk_with_behavior`]. `WalkBehavior` can be constructed via conversions from types -/// representing its fields. APIs generally accept `impl Into`, so these conversion -/// can be used implicitly. When constructed using such a conversion, `WalkBehavior` will use -/// defaults for any remaining fields. +/// representing its fields and sub-fields. APIs generally accept `impl Into`, so +/// these conversion can be used implicitly. When constructed using such a conversion, +/// `WalkBehavior` will use defaults for any remaining fields. +/// +/// # Defaults +/// +/// By default, walk behavior has [unbounded depth][`DepthBehavior::Unbounded`] and reads links as +/// [regular files][`LinkBehavior::ReadFile`] (ignoring their targets). Fields have the following +/// values: +/// +/// | Field | Description | Value | +/// |-----------|-----------------------------------|------------------------------| +/// | [`depth`] | Bounds on depth. | [`DepthBehavior::Unbounded`] | +/// | [`link`] | Interpretation of symbolic links. | [`LinkBehavior::ReadFile`] | /// /// # Examples /// @@ -342,79 +594,66 @@ pub enum LinkBehavior { /// } /// ``` /// +/// [`depth`]: crate::walk::WalkBehavior::depth /// [`Glob::walk_with_behavior`]: crate::Glob::walk_with_behavior -#[derive(Clone, Copy, Debug, Eq, PartialEq)] +/// [`link`]: crate::walk::WalkBehavior::link +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] pub struct WalkBehavior { - // TODO: Consider using a dedicated type for this field. Using primitive types does not - // interact well with conversions used in `walk` APIs. For example, if another `usize` - // field is introduced, then the conversions become ambiguous and confusing. - /// Maximum depth. - /// - /// Determines the maximum depth to which a directory tree will be traversed relative to the - /// root. A depth of zero corresponds to the root and so using such a depth will yield at most - /// one file entry that refers to the root. - /// - /// For [`PathExt::walk`], this depth is relative to the [`Path`] receiver. For [`Glob::walk`], - /// this depth is relative to the `directory` path parameter. + /// Bounds on the depth of the walk and matched files. /// - /// The default value is [`usize::MAX`]. + /// Determines the minimum and maximum depths of a walk and matched files relative to the [root + /// path segment][`Entry::root_relative_paths`]. The default value is + /// [`DepthBehavior::Unbounded`]. /// - /// [`Glob::walk`]: crate::Glob::walk - /// [`Path`]: std::path::Path - /// [`PathExt::walk`]: crate::walk::PathExt::walk - /// [`usize::MAX`]: usize::MAX - pub depth: usize, + /// [`DepthBehavior::Unbounded`]: crate::walk::DepthBehavior::Unbounded + /// [`Entry::root_relative_paths`]: crate::walk::Entry::root_relative_paths + pub depth: DepthBehavior, /// Interpretation of symbolic links. /// - /// Determines how symbolic links are interpreted when walking a directory tree. See - /// [`LinkBehavior`]. + /// Determines how symbolic links are interpreted when walking a directory tree. The default + /// value is [`LinkBehavior::ReadFile`]. /// - /// The default value is [`LinkBehavior::ReadFile`]. - /// - /// [`LinkBehavior`]: crate::walk::LinkBehavior /// [`LinkBehavior::ReadFile`]: crate::walk::LinkBehavior::ReadFile pub link: LinkBehavior, } -/// Constructs a `WalkBehavior` using the following defaults: -/// -/// | Field | Description | Value | -/// |-----------|-----------------------------------|----------------------------| -/// | [`depth`] | Maximum depth. | [`usize::MAX`] | -/// | [`link`] | Interpretation of symbolic links. | [`LinkBehavior::ReadFile`] | -/// -/// [`depth`]: crate::walk::WalkBehavior::depth -/// [`link`]: crate::walk::WalkBehavior::link -/// [`LinkBehavior::ReadFile`]: crate::walk::LinkBehavior::ReadFile -/// [`usize::MAX`]: usize::MAX -impl Default for WalkBehavior { - fn default() -> Self { - WalkBehavior { - depth: usize::MAX, - link: LinkBehavior::default(), - } - } -} - impl From<()> for WalkBehavior { fn from(_: ()) -> Self { Default::default() } } -impl From for WalkBehavior { - fn from(link: LinkBehavior) -> Self { +impl From for WalkBehavior { + fn from(depth: DepthBehavior) -> Self { WalkBehavior { - link, + depth, ..Default::default() } } } -impl From for WalkBehavior { - fn from(depth: usize) -> Self { +impl From for WalkBehavior { + fn from(max: DepthMax) -> Self { + DepthBehavior::from(max).into() + } +} + +impl From for WalkBehavior { + fn from(min: DepthMin) -> Self { + DepthBehavior::from(min).into() + } +} + +impl From for WalkBehavior { + fn from(minmax: DepthMinMax) -> Self { + DepthBehavior::from(minmax).into() + } +} + +impl From for WalkBehavior { + fn from(link: LinkBehavior) -> Self { WalkBehavior { - depth, + link, ..Default::default() } } @@ -424,7 +663,7 @@ impl From for WalkBehavior { /// /// [`FileIterator`]: crate::walk::FileIterator pub trait Entry { - /// Converts the entry into its file path. + /// Converts the entry into the path of the file. fn into_path(self) -> PathBuf where Self: Sized; @@ -432,48 +671,43 @@ pub trait Entry { /// Gets the path of the file. fn path(&self) -> &Path; - /// Gets the root and relative paths. - /// - /// The root path is the path to the walked directory from which the file entry has been read. - /// The relative path is the remainder of the file path of the entry (the path relative to the - /// root directory). Both the root and relative paths may be empty. + /// Splits the path of the file into its root and relative segments, in that order. /// - /// The root and relative paths can differ significantly depending on the way a directory is - /// walked, in particular when using a [`Glob`]. The following table describes some example - /// paths when using [`Glob::walk`]. + /// The root segment is the path from which the walk started. When walking a [`Path`] via + /// functions in [`PathExt`], the root is always the same as the path itself. When walking a + /// pattern like [`Glob`], the root segment differs depending on whether or not the pattern + /// [has a root][`Glob::has_root`]. If a pattern has a root, then the root segment is the + /// invariant prefix in the pattern, otherwise the root segment is the path given to functions + /// like [`Glob::walk`]. /// - /// | Glob Expression | Directory | Entry Path | Root | Relative | - /// |---------------------------|--------------|------------------------------------|--------------|----------------------------------| - /// | `**/*.txt` | `/home/user` | `/home/user/notes.txt` | `/home/user` | `notes.txt` | - /// | `projects/**/src/**/*.rs` | `.` | `./projects/fibonacci/src/main.rs` | `.` | `projects/fibonacci/src/main.rs` | - /// | `/var/log/**/*.log` | `.` | `/var/log/pacman.log` | | `/var/log/pacman.log` | - /// - /// See also [`GlobWalker::root_prefix_paths`]. + /// The relative segment is the remainder (descendant) of the path of the file (relative to the + /// root segment). /// /// [`Glob`]: crate::Glob + /// [`Glob::has_root`]: crate::Glob::has_root /// [`Glob::walk`]: crate::Glob::walk - /// [`GlobWalker::root_prefix_paths`]: crate::walk::GlobWalker::root_prefix_paths + /// [`Path`]: std::path::Path + /// [`PathExt`]: crate::walk::PathExt fn root_relative_paths(&self) -> (&Path, &Path); /// Gets the [`Metadata`] of the file. /// - /// On some platforms, this requires an additional read from the file system. + /// This may require an additional read from the file system on some platforms. /// /// [`Metadata`]: std::fs::Metadata fn metadata(&self) -> Result; /// Gets the type of the file (regular vs. directory). /// - /// Prefer this function over [`metadata`] if only the file type is needed, as this information - /// is cached. + /// This information may be cached and so this function should be preferred over [`metadata`] + /// if only the file type is needed. /// /// [`metadata`]: crate::walk::Entry::metadata fn file_type(&self) -> FileType; - /// Gets the depth of the file path from the root. + /// Gets the depth of the path of the file from the root segment. /// - /// The root path is the path to the walked directory from which the file entry has been read. - /// Use [`root_relative_paths`] to get the root path. + /// See [`root_relative_paths`]. /// /// [`root_relative_paths`]: crate::walk::Entry::root_relative_paths fn depth(&self) -> usize; @@ -485,7 +719,6 @@ pub trait Entry { #[derive(Clone, Debug)] pub struct TreeEntry { entry: DirEntry, - prefix: usize, } impl Entry for TreeEntry { @@ -498,11 +731,7 @@ impl Entry for TreeEntry { } fn root_relative_paths(&self) -> (&Path, &Path) { - self.path().split_at_depth( - self.depth() - .checked_add(self.prefix) - .expect("overflow determining root-relative paths"), - ) + self.path().split_at_depth(self.depth()) } fn metadata(&self) -> Result { @@ -541,34 +770,38 @@ impl Entry for TreeEntry { /// [`PathExt::walk`]: crate::walk::PathExt::walk #[derive(Debug)] pub struct WalkTree { - prefix: usize, is_dir: bool, input: walkdir::IntoIter, } impl WalkTree { fn with_behavior(root: impl Into, behavior: impl Into) -> Self { - WalkTree::with_prefix_and_behavior(root, 0, behavior) + WalkTree::with_pivot_and_behavior(root, 0, behavior) } - fn with_prefix_and_behavior( + fn with_pivot_and_behavior( root: impl Into, - prefix: usize, + pivot: usize, behavior: impl Into, ) -> Self { let root = root.into(); let WalkBehavior { link, depth } = behavior.into(); - let builder = WalkDir::new(root.as_path()); + let builder = WalkDir::new(root.as_path()).follow_links(match link { + LinkBehavior::ReadFile => false, + LinkBehavior::ReadTarget => true, + }); + let builder = match depth { + DepthBehavior::Max(max) => builder.max_depth(max.max_at_pivot(pivot)), + DepthBehavior::Min(min) => builder.min_depth(min.min_at_pivot(pivot)), + DepthBehavior::MinMax(minmax) => { + let (min, max) = minmax.min_max_at_pivot(pivot); + builder.min_depth(min).max_depth(max) + }, + DepthBehavior::Unbounded => builder, + }; WalkTree { - prefix, is_dir: false, - input: builder - .follow_links(match link { - LinkBehavior::ReadFile => false, - LinkBehavior::ReadTarget => true, - }) - .max_depth(depth) - .into_iter(), + input: builder.into_iter(), } } } @@ -590,13 +823,7 @@ impl Iterator for WalkTree { fn next(&mut self) -> Option { let (is_dir, next) = match self.input.next() { Some(result) => match result { - Ok(entry) => ( - entry.file_type().is_dir(), - Some(Ok(TreeEntry { - entry, - prefix: self.prefix, - })), - ), + Ok(entry) => (entry.file_type().is_dir(), Some(Ok(TreeEntry { entry }))), Err(error) => (false, Some(Err(error.into()))), }, _ => (false, None), @@ -1011,6 +1238,7 @@ pub mod harness { } } +// TODO: Construct `Glob`s in tests using `crate::harness::assert_new_glob_is_ok`. #[cfg(test)] mod tests { use build_fs_tree::{dir, file}; @@ -1019,7 +1247,7 @@ mod tests { use crate::walk::filter::{HierarchicalIterator, Separation, TreeResidue}; use crate::walk::harness::{self, assert_set_eq, TempTree}; - use crate::walk::{Entry, FileIterator, LinkBehavior, PathExt, WalkBehavior}; + use crate::walk::{DepthBehavior, Entry, FileIterator, LinkBehavior, PathExt}; use crate::Glob; // TODO: Rust's testing framework does not provide a mechanism for maintaining shared state nor @@ -1055,7 +1283,7 @@ mod tests { ) } - /// Writes a testing directory tree that includes a reentrant symbolic link to a temporary + /// Writes a testing directory tree that includes a re-entrant symbolic link to a temporary /// location on the file system. #[cfg(any(unix, windows))] #[fixture] @@ -1073,7 +1301,7 @@ mod tests { std::os::windows::fs::symlink_dir(target, link) } - // Get a temporary tree and create a reentrant symbolic link. + // Get a temporary tree and create a re-entrant symbolic link. let temptree = temptree(); link(&temptree, temptree.join("tests/cycle")) .expect("failed to write symbolic link in temporary tree"); @@ -1148,6 +1376,22 @@ mod tests { ); } + #[rstest] + fn walk_path_with_min_max_depth_behavior_excludes_ancestors_and_descendants( + temptree: TempTree, + ) { + harness::assert_walk_paths_eq( + temptree.walk_with_behavior(DepthBehavior::bounded(2, 2).unwrap()), + temptree.join_all([ + "doc/guide.md", + "src/glob.rs", + "src/lib.rs", + "tests/harness", + "tests/walk.rs", + ]), + ); + } + #[rstest] fn walk_glob_with_tree_includes_all_paths(temptree: TempTree) { harness::assert_walk_paths_eq( @@ -1272,17 +1516,52 @@ mod tests { #[rstest] fn walk_glob_with_max_depth_behavior_excludes_descendants(temptree: TempTree) { harness::assert_walk_paths_eq( - Glob::new("**").unwrap().walk_with_behavior( - temptree.as_ref(), - WalkBehavior { - depth: 1, - ..Default::default() - }, - ), + Glob::new("**") + .unwrap() + .walk_with_behavior(temptree.as_ref(), DepthBehavior::bounded(None, 1).unwrap()), temptree.join_all(["", "doc", "src", "tests", "README.md"]), ); } + #[rstest] + fn walk_glob_with_zero_max_depth_behavior_includes_only_root(temptree: TempTree) { + harness::assert_walk_paths_eq( + Glob::new("**") + .unwrap() + .walk_with_behavior(temptree.as_ref(), DepthBehavior::bounded(None, 0).unwrap()), + [temptree.as_ref()], + ); + } + + #[rstest] + fn walk_glob_with_min_depth_behavior_excludes_ancestors(temptree: TempTree) { + harness::assert_walk_paths_eq( + Glob::new("**") + .unwrap() + .walk_with_behavior(temptree.as_ref(), DepthBehavior::bounded(2, None).unwrap()), + temptree.join_all([ + "doc/guide.md", + "src/glob.rs", + "src/lib.rs", + "tests/harness", + "tests/harness/mod.rs", + "tests/walk.rs", + ]), + ); + } + + #[rstest] + fn walk_prefixed_glob_with_min_max_depth_behavior_excludes_ancestors_and_descendants( + temptree: TempTree, + ) { + harness::assert_walk_paths_eq( + Glob::new("tests/**") + .unwrap() + .walk_with_behavior(temptree.as_ref(), DepthBehavior::bounded(2, 2).unwrap()), + temptree.join_all(["tests/harness", "tests/walk.rs"]), + ); + } + #[cfg(any(unix, windows))] #[rstest] fn walk_glob_with_read_link_file_behavior_includes_link_file(