From 37f2efb0bdd3c9bef34c29072182601e8f97e81d Mon Sep 17 00:00:00 2001 From: Ed Page Date: Wed, 24 Aug 2022 15:53:23 -0500 Subject: [PATCH] perf: Vendor textwrap parts we need The immediate benefit is binary size but this also makes us more flexible on the implementation, like allowing wrapping of `StyledStr`. This removed 12 KiB from `.text` This helps towards #1365 and probably #2037 --- Cargo.lock | 25 +----- Cargo.toml | 5 +- src/output/help.rs | 33 +++----- src/output/mod.rs | 1 + src/output/textwrap/mod.rs | 112 +++++++++++++++++++++++++ src/output/textwrap/word_separators.rs | 91 ++++++++++++++++++++ src/output/textwrap/wrap_algorithms.rs | 44 ++++++++++ 7 files changed, 264 insertions(+), 47 deletions(-) create mode 100644 src/output/textwrap/word_separators.rs create mode 100644 src/output/textwrap/wrap_algorithms.rs diff --git a/Cargo.lock b/Cargo.lock index 582ffe35ea4..3177895255e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -113,7 +113,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" dependencies = [ "bitflags", - "textwrap 0.11.0", + "textwrap", "unicode-width", ] @@ -133,8 +133,7 @@ dependencies = [ "static_assertions", "strsim", "termcolor", - "terminal_size 0.2.1", - "textwrap 0.15.0", + "terminal_size", "trybuild", "trycmd", "unic-emoji-char", @@ -931,16 +930,6 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "terminal_size" -version = "0.1.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "633c1a546cee861a1a6d0dc69ebeca693bf4296661ba7852b9d21d159e0506df" -dependencies = [ - "libc", - "winapi", -] - [[package]] name = "terminal_size" version = "0.2.1" @@ -960,16 +949,6 @@ dependencies = [ "unicode-width", ] -[[package]] -name = "textwrap" -version = "0.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1141d4d61095b28419e22cb0bbf02755f5e54e0526f97f1e3d1d160e60885fb" -dependencies = [ - "terminal_size 0.1.17", - "unicode-width", -] - [[package]] name = "tinytemplate" version = "1.2.1" diff --git a/Cargo.toml b/Cargo.toml index 631a4698ce5..37e184e8fd4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -71,9 +71,9 @@ suggestions = ["dep:strsim"] deprecated = ["clap_derive?/deprecated"] # Guided experience to prepare for next breaking release (at different stages of development, this may become default) derive = ["clap_derive"] cargo = [] # Disable if you're not using Cargo, enables Cargo-env-var-dependent macros -wrap_help = ["dep:terminal_size", "textwrap/terminal_size"] +wrap_help = ["dep:terminal_size"] env = [] # Use environment variables during arg parsing -unicode = ["textwrap/unicode-width", "dep:unicode-width", "dep:unicase"] # Support for unicode characters in arguments and help messages +unicode = ["dep:unicode-width", "dep:unicase"] # Support for unicode characters in arguments and help messages perf = [] # Optimize for runtime performance # In-work features @@ -88,7 +88,6 @@ bench = false clap_derive = { path = "./clap_derive", version = "=4.0.0-alpha.0", optional = true } clap_lex = { path = "./clap_lex", version = "0.2.2" } bitflags = "1.2" -textwrap = { version = "0.15.0", default-features = false, features = [] } unicase = { version = "2.6", optional = true } strsim = { version = "0.10", optional = true } atty = { version = "0.2", optional = true } diff --git a/src/output/help.rs b/src/output/help.rs index 54b121c86d2..c17d184f1a4 100644 --- a/src/output/help.rs +++ b/src/output/help.rs @@ -10,6 +10,7 @@ use crate::builder::Str; use crate::builder::StyledStr; use crate::builder::{render_arg_val, Arg, Command}; use crate::output::display_width; +use crate::output::wrap; use crate::output::Usage; use crate::util::FlatSet; use crate::ArgAction; @@ -355,7 +356,7 @@ impl<'cmd, 'writer> Help<'cmd, 'writer> { self.cmd.get_before_help() }; if let Some(output) = before_help { - self.none(text_wrapper( + self.none(wrap( &output.unwrap_none().replace("{n}", "\n"), self.term_w, )); @@ -374,7 +375,7 @@ impl<'cmd, 'writer> Help<'cmd, 'writer> { }; if let Some(output) = after_help { self.none("\n\n"); - self.none(text_wrapper( + self.none(wrap( &output.unwrap_none().replace("{n}", "\n"), self.term_w, )); @@ -415,7 +416,7 @@ impl<'cmd, 'writer> Help<'cmd, 'writer> { // Determine how many newlines we need to insert let avail_chars = self.term_w - spaces; debug!("Help::help: Usable space...{}", avail_chars); - help = text_wrapper(&help.replace("{n}", "\n"), avail_chars); + help = wrap(&help.replace("{n}", "\n"), avail_chars); } else { debug!("No"); } @@ -498,7 +499,7 @@ impl<'cmd, 'writer> Help<'cmd, 'writer> { usize::MAX }; - let help = text_wrapper(help.unwrap_none(), avail_chars); + let help = wrap(help.unwrap_none(), avail_chars); let mut help = help.lines(); self.none(help.next().unwrap_or_default()); @@ -657,7 +658,7 @@ impl<'cmd, 'writer> Help<'cmd, 'writer> { if before_new_line { self.none("\n"); } - self.none(text_wrapper(output.unwrap_none(), self.term_w)); + self.none(wrap(output.unwrap_none(), self.term_w)); if after_new_line { self.none("\n"); } @@ -669,7 +670,7 @@ impl<'cmd, 'writer> Help<'cmd, 'writer> { if before_new_line { self.none("\n"); } - self.none(text_wrapper(author, self.term_w)); + self.none(wrap(author, self.term_w)); if after_new_line { self.none("\n"); } @@ -682,7 +683,7 @@ impl<'cmd, 'writer> Help<'cmd, 'writer> { .get_version() .or_else(|| self.cmd.get_long_version()); if let Some(output) = version { - self.none(text_wrapper(output, self.term_w)); + self.none(wrap(output, self.term_w)); } } } @@ -906,7 +907,7 @@ impl<'cmd, 'writer> Help<'cmd, 'writer> { fn write_display_name(&mut self) { debug!("Help::write_display_name"); - let display_name = text_wrapper( + let display_name = wrap( &self .cmd .get_display_name() @@ -926,10 +927,10 @@ impl<'cmd, 'writer> Help<'cmd, 'writer> { // In case we're dealing with subcommands i.e. git mv is translated to git-mv bn.replace(' ', "-") } else { - text_wrapper(&self.cmd.get_name().replace("{n}", "\n"), self.term_w) + wrap(&self.cmd.get_name().replace("{n}", "\n"), self.term_w) } } else { - text_wrapper(&self.cmd.get_name().replace("{n}", "\n"), self.term_w) + wrap(&self.cmd.get_name().replace("{n}", "\n"), self.term_w) }; self.good(&bin_name); } @@ -1071,16 +1072,6 @@ fn should_show_subcommand(subcommand: &Command) -> bool { !subcommand.is_hide_set() } -fn text_wrapper(help: &str, width: usize) -> String { - let wrapper = textwrap::Options::new(width) - .break_words(false) - .word_splitter(textwrap::WordSplitter::NoHyphenation); - help.lines() - .map(|line| textwrap::fill(line, &wrapper)) - .collect::>() - .join("\n") -} - #[cfg(test)] mod test { use super::*; @@ -1088,7 +1079,7 @@ mod test { #[test] fn wrap_help_last_word() { let help = String::from("foo bar baz"); - assert_eq!(text_wrapper(&help, 5), "foo\nbar\nbaz"); + assert_eq!(wrap(&help, 5), "foo\nbar\nbaz"); } #[test] diff --git a/src/output/mod.rs b/src/output/mod.rs index 228fc3189ff..1d7a4316a75 100644 --- a/src/output/mod.rs +++ b/src/output/mod.rs @@ -6,4 +6,5 @@ pub(crate) mod fmt; pub(crate) use self::help::Help; pub(crate) use self::textwrap::core::display_width; +pub(crate) use self::textwrap::wrap; pub(crate) use self::usage::Usage; diff --git a/src/output/textwrap/mod.rs b/src/output/textwrap/mod.rs index 90d7cc3655d..c6e831f65cf 100644 --- a/src/output/textwrap/mod.rs +++ b/src/output/textwrap/mod.rs @@ -1 +1,113 @@ +//! Fork of `textwrap` crate +//! +//! Benefits of forking: +//! - Pull in only what we need rather than relying on the compiler to remove what we don't need +//! - `LineWrapper` is able to incrementally wrap which will help with `StyledStr + pub(crate) mod core; +pub(crate) mod word_separators; +pub(crate) mod wrap_algorithms; + +pub(crate) fn wrap(content: &str, hard_width: usize) -> String { + let mut wrapper = wrap_algorithms::LineWrapper::new(hard_width); + let mut total = Vec::new(); + for line in content.split_inclusive('\n') { + wrapper.reset(); + let line = word_separators::find_words_ascii_space(line).collect::>(); + total.extend(wrapper.wrap(line)); + } + total.join("") +} + +#[cfg(test)] +mod test { + /// Compatibility shim to keep textwrap's tests + fn wrap(content: &str, hard_width: usize) -> Vec { + super::wrap(content, hard_width) + .trim_end() + .split('\n') + .map(|s| s.to_owned()) + .collect::>() + } + + #[test] + fn no_wrap() { + assert_eq!(wrap("foo", 10), vec!["foo"]); + } + + #[test] + fn wrap_simple() { + assert_eq!(wrap("foo bar baz", 5), vec!["foo", "bar", "baz"]); + } + + #[test] + fn to_be_or_not() { + assert_eq!( + wrap("To be, or not to be, that is the question.", 10), + vec!["To be, or", "not to be,", "that is", "the", "question."] + ); + } + + #[test] + fn multiple_words_on_first_line() { + assert_eq!(wrap("foo bar baz", 10), vec!["foo bar", "baz"]); + } + + #[test] + fn long_word() { + assert_eq!(wrap("foo", 0), vec!["foo"]); + } + + #[test] + fn long_words() { + assert_eq!(wrap("foo bar", 0), vec!["foo", "bar"]); + } + + #[test] + fn max_width() { + assert_eq!(wrap("foo bar", usize::MAX), vec!["foo bar"]); + + let text = "Hello there! This is some English text. \ + It should not be wrapped given the extents below."; + assert_eq!(wrap(text, usize::MAX), vec![text]); + } + + #[test] + fn leading_whitespace() { + assert_eq!(wrap(" foo bar", 6), vec![" foo", "bar"]); + } + + #[test] + fn leading_whitespace_empty_first_line() { + // If there is no space for the first word, the first line + // will be empty. This is because the string is split into + // words like [" ", "foobar ", "baz"], which puts "foobar " on + // the second line. We never output trailing whitespace + assert_eq!(wrap(" foobar baz", 6), vec!["", "foobar", "baz"]); + } + + #[test] + fn trailing_whitespace() { + // Whitespace is only significant inside a line. After a line + // gets too long and is broken, the first word starts in + // column zero and is not indented. + assert_eq!(wrap("foo bar baz ", 5), vec!["foo", "bar", "baz"]); + } + + #[test] + fn issue_99() { + // We did not reset the in_whitespace flag correctly and did + // not handle single-character words after a line break. + assert_eq!( + wrap("aaabbbccc x yyyzzzwww", 9), + vec!["aaabbbccc", "x", "yyyzzzwww"] + ); + } + + #[test] + fn issue_129() { + // The dash is an em-dash which takes up four bytes. We used + // to panic since we tried to index into the character. + assert_eq!(wrap("x – x", 1), vec!["x", "–", "x"]); + } +} diff --git a/src/output/textwrap/word_separators.rs b/src/output/textwrap/word_separators.rs new file mode 100644 index 00000000000..ac09231d56e --- /dev/null +++ b/src/output/textwrap/word_separators.rs @@ -0,0 +1,91 @@ +pub(crate) fn find_words_ascii_space(line: &str) -> impl Iterator + '_ { + let mut start = 0; + let mut in_whitespace = false; + let mut char_indices = line.char_indices(); + + std::iter::from_fn(move || { + for (idx, ch) in char_indices.by_ref() { + if in_whitespace && ch != ' ' { + let word = &line[start..idx]; + start = idx; + in_whitespace = ch == ' '; + return Some(word); + } + + in_whitespace = ch == ' '; + } + + if start < line.len() { + let word = &line[start..]; + start = line.len(); + return Some(word); + } + + None + }) +} + +#[cfg(test)] +mod tests { + use super::*; + + macro_rules! test_find_words { + ($ascii_name:ident, + $([ $line:expr, $ascii_words:expr ]),+) => { + #[test] + fn $ascii_name() { + $( + let expected_words: Vec<&str> = $ascii_words.to_vec(); + let actual_words = find_words_ascii_space($line) + .collect::>(); + assert_eq!(actual_words, expected_words, "Line: {:?}", $line); + )+ + } + }; + } + + test_find_words!(ascii_space_empty, ["", []]); + + test_find_words!(ascii_single_word, ["foo", ["foo"]]); + + test_find_words!(ascii_two_words, ["foo bar", ["foo ", "bar"]]); + + test_find_words!( + ascii_multiple_words, + ["foo bar", ["foo ", "bar"]], + ["x y z", ["x ", "y ", "z"]] + ); + + test_find_words!(ascii_only_whitespace, [" ", [" "]], [" ", [" "]]); + + test_find_words!( + ascii_inter_word_whitespace, + ["foo bar", ["foo ", "bar"]] + ); + + test_find_words!(ascii_trailing_whitespace, ["foo ", ["foo "]]); + + test_find_words!(ascii_leading_whitespace, [" foo", [" ", "foo"]]); + + test_find_words!( + ascii_multi_column_char, + ["\u{1f920}", ["\u{1f920}"]] // cowboy emoji 🤠 + ); + + test_find_words!( + ascii_hyphens, + ["foo-bar", ["foo-bar"]], + ["foo- bar", ["foo- ", "bar"]], + ["foo - bar", ["foo ", "- ", "bar"]], + ["foo -bar", ["foo ", "-bar"]] + ); + + test_find_words!(ascii_newline, ["foo\nbar", ["foo\nbar"]]); + + test_find_words!(ascii_tab, ["foo\tbar", ["foo\tbar"]]); + + test_find_words!( + ascii_non_breaking_space, + ["foo\u{00A0}bar", ["foo\u{00A0}bar"]] + ); +} diff --git a/src/output/textwrap/wrap_algorithms.rs b/src/output/textwrap/wrap_algorithms.rs new file mode 100644 index 00000000000..019cc04ffec --- /dev/null +++ b/src/output/textwrap/wrap_algorithms.rs @@ -0,0 +1,44 @@ +use super::core::display_width; + +#[derive(Debug)] +pub(crate) struct LineWrapper { + line_width: usize, + hard_width: usize, +} + +impl LineWrapper { + pub(crate) fn new(hard_width: usize) -> Self { + Self { + line_width: 0, + hard_width, + } + } + + pub(crate) fn reset(&mut self) { + self.line_width = 0; + } + + pub(crate) fn wrap<'w>(&mut self, mut words: Vec<&'w str>) -> Vec<&'w str> { + let mut i = 0; + while i < words.len() { + let word = &words[i]; + let trimmed = word.trim_end(); + let word_width = display_width(trimmed); + let trimmed_delta = word.len() - trimmed.len(); + if i != 0 && self.hard_width < self.line_width + word_width { + if 0 < i { + let last = i - 1; + let trimmed = words[last].trim_end(); + words[last] = trimmed; + } + words.insert(i, "\n"); + i += 1; + self.reset(); + } + self.line_width += word_width + trimmed_delta; + + i += 1; + } + words + } +}