Pass tab_width to functions instead of storing it in Word

mgeisler · Oct 30, 2022 · c6b9048 · mgeisler · Nov 1, 2022 · mtoohey31
1 parent 489f3b0
commit c6b9048
Show file tree

Hide file tree

Showing 6 changed files with 59 additions and 67 deletions.
diff --git a/src/core.rs b/src/core.rs
@@ -206,7 +206,7 @@ pub trait Fragment: std::fmt::Debug {
 
     /// Displayed width of the whitespace that must follow the word
     /// when the word is not at the end of a line.
-    fn whitespace_width(&self) -> f64;
+    fn whitespace_width(&self, tab_width: u8) -> f64;
 
     /// Displayed width of the penalty that must be inserted if the
     /// word falls at the end of a line.
@@ -227,8 +227,6 @@ pub struct Word<'a> {
     pub penalty: &'a str,
     // Cached width in columns.
     pub(crate) width: usize,
-    // Width of a tab character within this word.
-    pub(crate) tab_width: u8,
 }
 
 impl std::ops::Deref for Word<'_> {
@@ -245,22 +243,14 @@ impl<'a> Word<'a> {
     /// All trailing whitespace is automatically taken to be the whitespace part
     /// of the word.
     pub fn from(word: &str) -> Word<'_> {
-        Self::with_tab_width(word, 0)
-    }
-
-    /// Construct a `Word` from a string, with a customizable tab width
-    /// to calculate the total width with.
-    ///
-    /// All trailing whitespace is automatically removed to be the
-    /// whitespace part of the word.
-    pub fn with_tab_width(word: &str, tab_width: u8) -> Word<'_> {
         let trimmed = word.trim_end();
         Word {
             word: trimmed,
-            width: display_width(trimmed, tab_width),
+            // trimmed shouldn't contain whitespace, so we don't need to pass
+            // an accurate tab_width.
+            width: display_width(trimmed, 0),
             whitespace: &word[trimmed.len()..],
             penalty: "",
-            tab_width,
         }
     }
 
@@ -273,11 +263,15 @@ impl<'a> Word<'a> {
     /// ```
     /// use textwrap::core::Word;
     /// assert_eq!(
-    ///     Word::from("Hello!  ").break_apart(3).collect::<Vec<_>>(),
+    ///     Word::from("Hello!  ").break_apart(3, 0).collect::<Vec<_>>(),
     ///     vec![Word::from("Hel"), Word::from("lo!  ")]
     /// );
     /// ```
-    pub fn break_apart<'b>(&'b self, line_width: usize) -> impl Iterator<Item = Word<'a>> + 'b {
+    pub fn break_apart<'b>(
+        &'b self,
+        line_width: usize,
+        tab_width: u8,
+    ) -> impl Iterator<Item = Word<'a>> + 'b {
         let mut char_indices = self.word.char_indices();
         let mut offset = 0;
         let mut width = 0;
@@ -288,20 +282,19 @@ impl<'a> Word<'a> {
                     continue;
                 }
 
-                if width > 0 && width + ch_width(ch, self.tab_width) > line_width {
+                if width > 0 && width + ch_width(ch, tab_width) > line_width {
                     let word = Word {
                         word: &self.word[offset..idx],
                         whitespace: "",
                         penalty: "",
-                        tab_width: self.tab_width,
                         width,
                     };
                     offset = idx;
-                    width = ch_width(ch, self.tab_width);
+                    width = ch_width(ch, tab_width);
                     return Some(word);
                 }
 
-                width += ch_width(ch, self.tab_width);
+                width += ch_width(ch, tab_width);
             }
 
             if offset < self.word.len() {
@@ -326,8 +319,8 @@ impl Fragment for Word<'_> {
     }
 
     #[inline]
-    fn whitespace_width(&self) -> f64 {
-        display_width(self.whitespace, self.tab_width) as f64
+    fn whitespace_width(&self, tab_width: u8) -> f64 {
+        display_width(self.whitespace, tab_width) as f64
     }
 
     // We assume the penalty is `""` or `"-"`. This allows us to
@@ -343,14 +336,14 @@ impl Fragment for Word<'_> {
 /// This simply calls [`Word::break_apart`] on words that are too
 /// wide. This means that no extra `'-'` is inserted, the word is
 /// simply broken into smaller pieces.
-pub fn break_words<'a, I>(words: I, line_width: usize) -> Vec<Word<'a>>
+pub fn break_words<'a, I>(words: I, line_width: usize, tab_width: u8) -> Vec<Word<'a>>
 where
     I: IntoIterator<Item = Word<'a>>,
 {
     let mut shortened_words = Vec::new();
     for word in words {
         if word.width() > line_width as f64 {
-            shortened_words.extend(word.break_apart(line_width));
+            shortened_words.extend(word.break_apart(line_width, tab_width));
         } else {
             shortened_words.push(word);
         }

diff --git a/src/fill.rs b/src/fill.rs
@@ -128,12 +128,9 @@ where
     for line in text.split('\n') {
         let words = WordSeparator::AsciiSpace
             .find_words(line)
-            .map(|mut w| {
-                w.tab_width = options.tab_width;
-                w
-            })
             .collect::<Vec<_>>();
-        let wrapped_words = wrap_algorithms::wrap_first_fit(&words, &[options.width as f64]);
+        let wrapped_words =
+            wrap_algorithms::wrap_first_fit(&words, &[options.width as f64], options.tab_width);
 
         let mut line_offset = offset;
         for words in &wrapped_words[..wrapped_words.len() - 1] {

diff --git a/src/word_splitters.rs b/src/word_splitters.rs
@@ -193,10 +193,12 @@ where
                 let need_hyphen = !word[..idx].ends_with('-');
                 let w = Word {
                     word: &word.word[prev..idx],
-                    width: display_width(&word[prev..idx], word.tab_width),
+                    // word[prev..idx] is a subset of the original word.word,
+                    // which is trimmed of whitespace in Word::from, so we
+                    // don't need an accurate tab_width.
+                    width: display_width(&word[prev..idx], 0),
                     whitespace: "",
                     penalty: if need_hyphen { "-" } else { "" },
-                    tab_width: word.tab_width,
                 };
                 prev = idx;
                 return Some(w);
@@ -205,7 +207,9 @@ where
             if prev < word.word.len() || prev == 0 {
                 let w = Word {
                     word: &word.word[prev..],
-                    width: display_width(&word[prev..], word.tab_width),
+                    // see comment by display_width call above regarding
+                    // tab_width.
+                    width: display_width(&word[prev..], 0),
                     ..word
                 };
                 prev = word.word.len() + 1;
@@ -279,15 +283,13 @@ mod tests {
                     word: "foo",
                     width: 3,
                     whitespace: "",
-                    penalty: "-",
-                    tab_width: 0
+                    penalty: "-"
                 },
                 Word {
                     word: "bar",
                     width: 3,
                     whitespace: "",
-                    penalty: "",
-                    tab_width: 0
+                    penalty: ""
                 }
             ]
         );
@@ -302,15 +304,13 @@ mod tests {
                     word: "fo-",
                     width: 3,
                     whitespace: "",
-                    penalty: "",
-                    tab_width: 0
+                    penalty: ""
                 },
                 Word {
                     word: "bar",
                     width: 3,
                     whitespace: "",
-                    penalty: "",
-                    tab_width: 0
+                    penalty: ""
                 }
             ]
         );

diff --git a/src/wrap.rs b/src/wrap.rs
@@ -225,27 +225,26 @@ pub(crate) fn wrap_single_line_slow_path<'a>(
         .saturating_sub(display_width(options.subsequent_indent, options.tab_width));
     let line_widths = [initial_width, subsequent_width];
 
-    let words = options.word_separator.find_words(line).map(|mut w| {
-        w.tab_width = options.tab_width;
-        w
-    });
+    let words = options.word_separator.find_words(line);
     let split_words = split_words(words, &options.word_splitter);
     let broken_words = if options.break_words {
-        let mut broken_words = break_words(split_words, line_widths[1]);
+        let mut broken_words = break_words(split_words, line_widths[1], options.tab_width);
         if !options.initial_indent.is_empty() {
             // Without this, the first word will always go into the
             // first line. However, since we break words based on the
             // _second_ line width, it can be wrong to unconditionally
             // put the first word onto the first line. An empty
             // zero-width word fixed this.
-            broken_words.insert(0, Word::with_tab_width("", options.tab_width));
+            broken_words.insert(0, Word::from(""));
         }
         broken_words
     } else {
         split_words.collect::<Vec<_>>()
     };
 
-    let wrapped_words = options.wrap_algorithm.wrap(&broken_words, &line_widths);
+    let wrapped_words = options
+        .wrap_algorithm
+        .wrap(&broken_words, &line_widths, options.tab_width);
 
     let mut idx = 0;
     for words in wrapped_words {

diff --git a/src/wrap_algorithms.rs b/src/wrap_algorithms.rs
@@ -168,6 +168,7 @@ impl WrapAlgorithm {
         &self,
         words: &'b [Word<'a>],
         line_widths: &'b [usize],
+        tab_width: u8,
     ) -> Vec<&'b [Word<'a>]> {
         // Every integer up to 2u64.pow(f64::MANTISSA_DIGITS) = 2**53
         // = 9_007_199_254_740_992 can be represented without loss by
@@ -176,13 +177,13 @@ impl WrapAlgorithm {
         let f64_line_widths = line_widths.iter().map(|w| *w as f64).collect::<Vec<_>>();
 
         match self {
-            WrapAlgorithm::FirstFit => wrap_first_fit(words, &f64_line_widths),
+            WrapAlgorithm::FirstFit => wrap_first_fit(words, &f64_line_widths, tab_width),
 
             #[cfg(feature = "smawk")]
             WrapAlgorithm::OptimalFit(penalties) => {
                 // The computation cannnot overflow when the line
                 // widths are restricted to usize.
-                wrap_optimal_fit(words, &f64_line_widths, penalties).unwrap()
+                wrap_optimal_fit(words, &f64_line_widths, tab_width, penalties).unwrap()
             }
 
             WrapAlgorithm::Custom(func) => func(words, line_widths),
@@ -231,7 +232,7 @@ impl Default for WrapAlgorithm {
 ///
 /// let text = "These few words will unfortunately not wrap nicely.";
 /// let words = WordSeparator::AsciiSpace.find_words(text).collect::<Vec<_>>();
-/// assert_eq!(lines_to_strings(wrap_first_fit(&words, &[15.0])),
+/// assert_eq!(lines_to_strings(wrap_first_fit(&words, &[15.0], 0)),
 ///            vec!["These few words",
 ///                 "will",  // <-- short line
 ///                 "unfortunately",
@@ -242,7 +243,7 @@ impl Default for WrapAlgorithm {
 /// #[cfg(feature = "smawk")]
 /// use textwrap::wrap_algorithms::{wrap_optimal_fit, Penalties};
 /// #[cfg(feature = "smawk")]
-/// assert_eq!(lines_to_strings(wrap_optimal_fit(&words, &[15.0], &Penalties::new()).unwrap()),
+/// assert_eq!(lines_to_strings(wrap_optimal_fit(&words, &[15.0], 0, &Penalties::new()).unwrap()),
 ///            vec!["These few",
 ///                 "words will",
 ///                 "unfortunately",
@@ -284,7 +285,7 @@ impl Default for WrapAlgorithm {
 ///
 /// impl Fragment for Task<'_> {
 ///     fn width(&self) -> f64 { self.hours }
-///     fn whitespace_width(&self) -> f64 { self.sweep }
+///     fn whitespace_width(&self, tab_width: u8) -> f64 { self.sweep }
 ///     fn penalty_width(&self) -> f64 { self.cleanup }
 /// }
 ///
@@ -308,7 +309,7 @@ impl Default for WrapAlgorithm {
 ///     let mut days = Vec::new();
 ///     // Assign tasks to days. The assignment is a vector of slices,
 ///     // with a slice per day.
-///     let assigned_days: Vec<&[Task<'a>]> = wrap_first_fit(&tasks, &[day_length]);
+///     let assigned_days: Vec<&[Task<'a>]> = wrap_first_fit(&tasks, &[day_length], 0);
 ///     for day in assigned_days.iter() {
 ///         let last = day.last().unwrap();
 ///         let work_hours: f64 = day.iter().map(|t| t.hours + t.sweep).sum();
@@ -347,6 +348,7 @@ impl Default for WrapAlgorithm {
 pub fn wrap_first_fit<'a, 'b, T: Fragment>(
     fragments: &'a [T],
     line_widths: &'b [f64],
+    tab_width: u8,
 ) -> Vec<&'a [T]> {
     // The final line width is used for all remaining lines.
     let default_line_width = line_widths.last().copied().unwrap_or(0.0);
@@ -364,7 +366,7 @@ pub fn wrap_first_fit<'a, 'b, T: Fragment>(
             start = idx;
             width = 0.0;
         }
-        width += fragment.width() + fragment.whitespace_width();
+        width += fragment.width() + fragment.whitespace_width(tab_width);
     }
     lines.push(&fragments[start..]);
     lines
@@ -380,7 +382,7 @@ mod tests {
     #[rustfmt::skip]
     impl Fragment for Word {
         fn width(&self) -> f64 { self.0 }
-        fn whitespace_width(&self) -> f64 { 1.0 }
+        fn whitespace_width(&self, _: u8) -> f64 { 1.0 }
         fn penalty_width(&self) -> f64 { 0.0 }
     }
 
@@ -397,7 +399,7 @@ mod tests {
         // Wrap at just under f64::MAX (~19e307). The tiny
         // whitespace_widths disappear because of loss of precision.
         assert_eq!(
-            wrap_first_fit(&words, &[15e307]),
+            wrap_first_fit(&words, &[15e307], 0),
             &[
                 vec![
                     Word(1e307),