diff --git a/src/builder/arg.rs b/src/builder/arg.rs index 07ece27ac57..437a03efe02 100644 --- a/src/builder/arg.rs +++ b/src/builder/arg.rs @@ -63,6 +63,7 @@ pub struct Arg<'help> { pub(crate) name: &'help str, pub(crate) help: Option<&'help str>, pub(crate) long_help: Option<&'help str>, + pub(crate) value_parser: Option, pub(crate) blacklist: Vec, pub(crate) settings: ArgFlags, pub(crate) overrides: Vec, @@ -4737,6 +4738,17 @@ impl<'help> Arg<'help> { self.is_set(ArgSettings::AllowInvalidUtf8) } + /// Configured parser for argument values + pub(crate) fn get_value_parser(&self) -> &super::ValueParser { + if let Some(value_parser) = self.value_parser.as_ref() { + value_parser + } else if self.is_allow_invalid_utf8_set() { + &super::ValueParser(super::ValueParserInner::OsString) + } else { + &super::ValueParser(super::ValueParserInner::String) + } + } + /// Report whether [`Arg::global`] is set pub fn is_global_set(&self) -> bool { self.is_set(ArgSettings::Global) @@ -5039,6 +5051,14 @@ impl<'help> Arg<'help> { self.settings.set(ArgSettings::TakesValue); } + if self.value_parser.is_none() { + if self.is_allow_invalid_utf8_set() { + self.value_parser = Some(super::ValueParser::os_string()); + } else { + self.value_parser = Some(super::ValueParser::string()); + } + } + if (self.is_use_value_delimiter_set() || self.is_require_value_delimiter_set()) && self.val_delim.is_none() { diff --git a/src/builder/command.rs b/src/builder/command.rs index fc806a40bd0..23b2ee930af 100644 --- a/src/builder/command.rs +++ b/src/builder/command.rs @@ -3678,6 +3678,17 @@ impl<'help> App<'help> { self.is_set(AppSettings::AllowInvalidUtf8ForExternalSubcommands) } + /// Configured parser for values passed to an external subcommand + pub(crate) fn get_external_subcommand_value_parser(&self) -> Option<&super::ValueParser> { + if !self.is_allow_external_subcommands_set() { + None + } else if self.is_allow_invalid_utf8_for_external_subcommands_set() { + Some(&super::ValueParser(super::ValueParserInner::OsString)) + } else { + Some(&super::ValueParser(super::ValueParserInner::String)) + } + } + /// Report whether [`Command::args_conflicts_with_subcommands`] is set pub fn is_args_conflicts_with_subcommands_set(&self) -> bool { self.is_set(AppSettings::ArgsNegateSubcommands) diff --git a/src/builder/mod.rs b/src/builder/mod.rs index b38f95e1e11..2394ffcfae2 100644 --- a/src/builder/mod.rs +++ b/src/builder/mod.rs @@ -12,6 +12,7 @@ mod command; mod possible_value; mod usage_parser; mod value_hint; +mod value_parser; #[cfg(feature = "regex")] mod regex; @@ -29,6 +30,7 @@ pub use arg_settings::{ArgFlags, ArgSettings}; pub use command::Command; pub use possible_value::PossibleValue; pub use value_hint::ValueHint; +pub(crate) use value_parser::ValueParser; #[allow(deprecated)] pub use command::App; @@ -38,3 +40,4 @@ pub use self::regex::RegexRef; pub(crate) use arg::display_arg_val; pub(crate) use arg_predicate::ArgPredicate; +pub(crate) use value_parser::ValueParserInner; diff --git a/src/builder/value_parser.rs b/src/builder/value_parser.rs new file mode 100644 index 00000000000..8b3894203eb --- /dev/null +++ b/src/builder/value_parser.rs @@ -0,0 +1,59 @@ +use std::sync::Arc; + +use crate::parser::AnyValue; + +/// Parse/validate argument values +#[derive(Clone)] +pub struct ValueParser(pub(crate) ValueParserInner); + +#[derive(Clone)] +pub(crate) enum ValueParserInner { + String, + OsString, +} + +impl ValueParser { + /// `String` parser for argument values + pub const fn string() -> Self { + Self(ValueParserInner::String) + } + + /// `OsString` parser for argument values + pub const fn os_string() -> Self { + Self(ValueParserInner::OsString) + } +} + +impl ValueParser { + /// Parse into a `Arc` + /// + /// When `arg` is `None`, an external subcommand value is being parsed. + pub fn parse_ref( + &self, + cmd: &crate::Command, + _arg: Option<&crate::Arg>, + value: &std::ffi::OsStr, + ) -> Result { + match &self.0 { + ValueParserInner::String => { + let value = value.to_str().ok_or_else(|| { + crate::Error::invalid_utf8( + cmd, + crate::output::Usage::new(cmd).create_usage_with_title(&[]), + ) + })?; + Ok(Arc::new(value.to_owned())) + } + ValueParserInner::OsString => Ok(Arc::new(value.to_owned())), + } + } +} + +impl<'help> std::fmt::Debug for ValueParser { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + match &self.0 { + ValueParserInner::String => f.debug_struct("ValueParser::string").finish(), + ValueParserInner::OsString => f.debug_struct("ValueParser::os_string").finish(), + } + } +} diff --git a/src/parser/arg_matcher.rs b/src/parser/arg_matcher.rs index 167e6e0ef51..ebc576b36a1 100644 --- a/src/parser/arg_matcher.rs +++ b/src/parser/arg_matcher.rs @@ -6,6 +6,7 @@ use std::ops::Deref; // Internal use crate::builder::{Arg, ArgPredicate, Command}; +use crate::parser::AnyValue; use crate::parser::{ArgMatches, MatchedArg, SubCommand, ValueSource}; use crate::util::Id; @@ -90,11 +91,6 @@ impl ArgMatcher { } } - #[cfg(not(feature = "unstable-v4"))] - pub(crate) fn get_mut(&mut self, arg: &Id) -> Option<&mut MatchedArg> { - self.0.args.get_mut(arg) - } - pub(crate) fn get(&self, arg: &Id) -> Option<&MatchedArg> { self.0.args.get(arg) } @@ -137,7 +133,6 @@ impl ArgMatcher { let ma = self.entry(id).or_insert(MatchedArg::new()); ma.update_ty(ValueSource::CommandLine); ma.set_ignore_case(arg.is_ignore_case_set()); - ma.invalid_utf8_allowed(arg.is_allow_invalid_utf8_set()); ma.inc_occurrences(); } @@ -149,39 +144,42 @@ impl ArgMatcher { } #[cfg(feature = "unstable-v4")] - pub(crate) fn inc_occurrence_of_external(&mut self, allow_invalid_utf8: bool) { + pub(crate) fn inc_occurrence_of_external(&mut self) { let id = &Id::empty_hash(); - debug!( - "ArgMatcher::inc_occurrence_of_external: id={:?}, allow_invalid_utf8={}", - id, allow_invalid_utf8 - ); + debug!("ArgMatcher::inc_occurrence_of_external: id={:?}", id,); let ma = self.entry(id).or_insert(MatchedArg::new()); ma.update_ty(ValueSource::CommandLine); - ma.invalid_utf8_allowed(allow_invalid_utf8); ma.inc_occurrences(); } - pub(crate) fn add_val_to(&mut self, arg: &Id, val: OsString, ty: ValueSource, append: bool) { + pub(crate) fn add_val_to( + &mut self, + arg: &Id, + val: AnyValue, + raw_val: OsString, + ty: ValueSource, + append: bool, + ) { if append { - self.append_val_to(arg, val, ty); + self.append_val_to(arg, val, raw_val, ty); } else { - self.push_val_to(arg, val, ty); + self.push_val_to(arg, val, raw_val, ty); } } - fn push_val_to(&mut self, arg: &Id, val: OsString, ty: ValueSource) { + fn push_val_to(&mut self, arg: &Id, val: AnyValue, raw_val: OsString, ty: ValueSource) { // We will manually inc occurrences later(for flexibility under // specific circumstances, like only add one occurrence for flag // when we met: `--flag=one,two`). let ma = self.entry(arg).or_default(); ma.update_ty(ty); - ma.push_val(val); + ma.push_val(val, raw_val); } - fn append_val_to(&mut self, arg: &Id, val: OsString, ty: ValueSource) { + fn append_val_to(&mut self, arg: &Id, val: AnyValue, raw_val: OsString, ty: ValueSource) { let ma = self.entry(arg).or_default(); ma.update_ty(ty); - ma.append_val(val); + ma.append_val(val, raw_val); } pub(crate) fn new_val_group(&mut self, arg: &Id) { diff --git a/src/parser/matches/arg_matches.rs b/src/parser/matches/arg_matches.rs index 50757e77f26..7b8b3a37579 100644 --- a/src/parser/matches/arg_matches.rs +++ b/src/parser/matches/arg_matches.rs @@ -10,10 +10,11 @@ use std::str::FromStr; use indexmap::IndexMap; // Internal +use crate::parser::AnyValue; use crate::parser::MatchedArg; use crate::parser::ValueSource; use crate::util::{Id, Key}; -use crate::{Error, INVALID_UTF8}; +use crate::Error; /// Container for parse results. /// @@ -140,9 +141,8 @@ impl ArgMatches { pub fn value_of(&self, id: T) -> Option<&str> { let id = Id::from(id); let arg = self.get_arg(&id)?; - assert_utf8_validation(arg, &id); - let v = arg.first()?; - Some(v.to_str().expect(INVALID_UTF8)) + let v = unwrap_string_arg(&id, arg.first()?); + Some(v) } /// Gets the lossy value of a specific option or positional argument. @@ -190,8 +190,7 @@ impl ArgMatches { pub fn value_of_lossy(&self, id: T) -> Option> { let id = Id::from(id); let arg = self.get_arg(&id)?; - assert_no_utf8_validation(arg, &id); - let v = arg.first()?; + let v = unwrap_os_string_arg(&id, arg.first()?); Some(v.to_string_lossy()) } @@ -241,9 +240,8 @@ impl ArgMatches { pub fn value_of_os(&self, id: T) -> Option<&OsStr> { let id = Id::from(id); let arg = self.get_arg(&id)?; - assert_no_utf8_validation(arg, &id); - let v = arg.first()?; - Some(v.as_os_str()) + let v = unwrap_os_string_arg(&id, arg.first()?); + Some(v) } /// Get an [`Iterator`] over [values] of a specific option or positional argument. @@ -280,12 +278,8 @@ impl ArgMatches { pub fn values_of(&self, id: T) -> Option { let id = Id::from(id); let arg = self.get_arg(&id)?; - assert_utf8_validation(arg, &id); - fn to_str_slice(o: &OsString) -> &str { - o.to_str().expect(INVALID_UTF8) - } let v = Values { - iter: arg.vals_flatten().map(to_str_slice), + iter: arg.vals_flatten().map(unwrap_string), len: arg.num_vals(), }; Some(v) @@ -329,11 +323,8 @@ impl ArgMatches { pub fn grouped_values_of(&self, id: T) -> Option { let id = Id::from(id); let arg = self.get_arg(&id)?; - assert_utf8_validation(arg, &id); let v = GroupedValues { - iter: arg - .vals() - .map(|g| g.iter().map(|x| x.to_str().expect(INVALID_UTF8)).collect()), + iter: arg.vals().map(|g| g.iter().map(unwrap_string).collect()), len: arg.vals().len(), }; Some(v) @@ -379,10 +370,9 @@ impl ArgMatches { pub fn values_of_lossy(&self, id: T) -> Option> { let id = Id::from(id); let arg = self.get_arg(&id)?; - assert_no_utf8_validation(arg, &id); let v = arg .vals_flatten() - .map(|v| v.to_string_lossy().into_owned()) + .map(|v| unwrap_os_string_arg(&id, v).to_string_lossy().into_owned()) .collect(); Some(v) } @@ -433,12 +423,8 @@ impl ArgMatches { pub fn values_of_os(&self, id: T) -> Option { let id = Id::from(id); let arg = self.get_arg(&id)?; - assert_no_utf8_validation(arg, &id); - fn to_str_slice(o: &OsString) -> &OsStr { - o - } let v = OsValues { - iter: arg.vals_flatten().map(to_str_slice), + iter: arg.vals_flatten().map(unwrap_os_string), len: arg.num_vals(), }; Some(v) @@ -1215,7 +1201,7 @@ pub(crate) struct SubCommand { #[derive(Clone, Debug)] pub struct Values<'a> { #[allow(clippy::type_complexity)] - iter: Map>>, for<'r> fn(&'r OsString) -> &'r str>, + iter: Map>>, for<'r> fn(&'r AnyValue) -> &'r str>, len: usize, } @@ -1241,7 +1227,7 @@ impl<'a> ExactSizeIterator for Values<'a> {} /// Creates an empty iterator. impl<'a> Default for Values<'a> { fn default() -> Self { - static EMPTY: [Vec; 0] = []; + static EMPTY: [Vec; 0] = []; Values { iter: EMPTY[..].iter().flatten().map(|_| unreachable!()), len: 0, @@ -1253,7 +1239,7 @@ impl<'a> Default for Values<'a> { #[allow(missing_debug_implementations)] pub struct GroupedValues<'a> { #[allow(clippy::type_complexity)] - iter: Map>, fn(&Vec) -> Vec<&str>>, + iter: Map>, fn(&Vec) -> Vec<&str>>, len: usize, } @@ -1279,7 +1265,7 @@ impl<'a> ExactSizeIterator for GroupedValues<'a> {} /// Creates an empty iterator. Used for `unwrap_or_default()`. impl<'a> Default for GroupedValues<'a> { fn default() -> Self { - static EMPTY: [Vec; 0] = []; + static EMPTY: [Vec; 0] = []; GroupedValues { iter: EMPTY[..].iter().map(|_| unreachable!()), len: 0, @@ -1309,7 +1295,7 @@ impl<'a> Default for GroupedValues<'a> { #[derive(Clone, Debug)] pub struct OsValues<'a> { #[allow(clippy::type_complexity)] - iter: Map>>, fn(&OsString) -> &OsStr>, + iter: Map>>, fn(&AnyValue) -> &OsStr>, len: usize, } @@ -1335,7 +1321,7 @@ impl<'a> ExactSizeIterator for OsValues<'a> {} /// Creates an empty iterator. impl Default for OsValues<'_> { fn default() -> Self { - static EMPTY: [Vec; 0] = []; + static EMPTY: [Vec; 0] = []; OsValues { iter: EMPTY[..].iter().flatten().map(|_| unreachable!()), len: 0, @@ -1402,22 +1388,52 @@ impl<'a> Default for Indices<'a> { #[cfg_attr(debug_assertions, track_caller)] #[inline] -fn assert_utf8_validation(arg: &MatchedArg, id: &Id) { - debug_assert!( - matches!(arg.is_invalid_utf8_allowed(), None | Some(false)), - "Must use `_os` lookups with `Arg::allow_invalid_utf8` at `{:?}`", - id - ); +fn unwrap_string(value: &AnyValue) -> &str { + match value.downcast_ref::() { + Some(value) => value, + None => { + panic!("Must use `_os` lookups with `Arg::allow_invalid_utf8`",) + } + } +} + +#[cfg_attr(debug_assertions, track_caller)] +#[inline] +fn unwrap_string_arg<'v>(id: &Id, value: &'v AnyValue) -> &'v str { + match value.downcast_ref::() { + Some(value) => value, + None => { + panic!( + "Must use `_os` lookups with `Arg::allow_invalid_utf8` at `{:?}`", + id + ) + } + } +} + +#[cfg_attr(debug_assertions, track_caller)] +#[inline] +fn unwrap_os_string(value: &AnyValue) -> &OsStr { + match value.downcast_ref::() { + Some(value) => value, + None => { + panic!("Must use `Arg::allow_invalid_utf8` with `_os` lookups",) + } + } } #[cfg_attr(debug_assertions, track_caller)] #[inline] -fn assert_no_utf8_validation(arg: &MatchedArg, id: &Id) { - debug_assert!( - matches!(arg.is_invalid_utf8_allowed(), None | Some(true)), - "Must use `Arg::allow_invalid_utf8` with `_os` lookups at `{:?}`", - id - ); +fn unwrap_os_string_arg<'v>(id: &Id, value: &'v AnyValue) -> &'v OsStr { + match value.downcast_ref::() { + Some(value) => value, + None => { + panic!( + "Must use `Arg::allow_invalid_utf8` with `_os` lookups at `{:?}`", + id + ) + } + } } #[cfg(test)] diff --git a/src/parser/matches/matched_arg.rs b/src/parser/matches/matched_arg.rs index aebef68f1c3..15f12433666 100644 --- a/src/parser/matches/matched_arg.rs +++ b/src/parser/matches/matched_arg.rs @@ -6,18 +6,19 @@ use std::{ }; use crate::builder::ArgPredicate; +use crate::parser::AnyValue; use crate::parser::ValueSource; use crate::util::eq_ignore_case; use crate::INTERNAL_ERROR_MSG; -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone)] pub(crate) struct MatchedArg { occurs: u64, ty: Option, indices: Vec, - vals: Vec>, + vals: Vec>, + raw_vals: Vec>, ignore_case: bool, - invalid_utf8_allowed: Option, } impl MatchedArg { @@ -27,8 +28,8 @@ impl MatchedArg { ty: None, indices: Vec::new(), vals: Vec::new(), + raw_vals: Vec::new(), ignore_case: false, - invalid_utf8_allowed: None, } } @@ -52,30 +53,50 @@ impl MatchedArg { self.indices.push(index) } + #[cfg(test)] + pub(crate) fn raw_vals(&self) -> Iter> { + self.raw_vals.iter() + } + #[cfg(feature = "unstable-grouped")] - pub(crate) fn vals(&self) -> Iter> { + pub(crate) fn vals(&self) -> Iter> { self.vals.iter() } - pub(crate) fn vals_flatten(&self) -> Flatten>> { + pub(crate) fn vals_flatten(&self) -> Flatten>> { self.vals.iter().flatten() } - pub(crate) fn first(&self) -> Option<&OsString> { + pub(crate) fn raw_vals_flatten(&self) -> Flatten>> { + self.raw_vals.iter().flatten() + } + + pub(crate) fn first(&self) -> Option<&AnyValue> { self.vals_flatten().next() } - pub(crate) fn push_val(&mut self, val: OsString) { - self.vals.push(vec![val]) + #[cfg(test)] + pub(crate) fn first_raw(&self) -> Option<&OsString> { + self.raw_vals_flatten().next() + } + + pub(crate) fn push_val(&mut self, val: AnyValue, raw_val: OsString) { + self.vals.push(vec![val]); + self.raw_vals.push(vec![raw_val]); } pub(crate) fn new_val_group(&mut self) { - self.vals.push(vec![]) + self.vals.push(vec![]); + self.raw_vals.push(vec![]); } - pub(crate) fn append_val(&mut self, val: OsString) { + pub(crate) fn append_val(&mut self, val: AnyValue, raw_val: OsString) { // We assume there is always a group created before. - self.vals.last_mut().expect(INTERNAL_ERROR_MSG).push(val) + self.vals.last_mut().expect(INTERNAL_ERROR_MSG).push(val); + self.raw_vals + .last_mut() + .expect(INTERNAL_ERROR_MSG) + .push(raw_val); } pub(crate) fn num_vals(&self) -> usize { @@ -102,7 +123,7 @@ impl MatchedArg { } match predicate { - ArgPredicate::Equals(val) => self.vals_flatten().any(|v| { + ArgPredicate::Equals(val) => self.raw_vals_flatten().any(|v| { if self.ignore_case { // If `v` isn't utf8, it can't match `val`, so `OsStr::to_str` should be fine eq_ignore_case(&v.to_string_lossy(), &val.to_string_lossy()) @@ -129,14 +150,6 @@ impl MatchedArg { pub(crate) fn set_ignore_case(&mut self, yes: bool) { self.ignore_case = yes; } - - pub(crate) fn invalid_utf8_allowed(&mut self, yes: bool) { - self.invalid_utf8_allowed = Some(yes); - } - - pub(crate) fn is_invalid_utf8_allowed(&self) -> Option { - self.invalid_utf8_allowed - } } impl Default for MatchedArg { @@ -145,37 +158,67 @@ impl Default for MatchedArg { } } +impl PartialEq for MatchedArg { + fn eq(&self, other: &MatchedArg) -> bool { + let MatchedArg { + occurs: self_occurs, + ty: self_ty, + indices: self_indices, + vals: _, + raw_vals: self_raw_vals, + ignore_case: self_ignore_case, + } = self; + let MatchedArg { + occurs: other_occurs, + ty: other_ty, + indices: other_indices, + vals: _, + raw_vals: other_raw_vals, + ignore_case: other_ignore_case, + } = other; + self_occurs == other_occurs + && self_ty == other_ty + && self_indices == other_indices + && self_raw_vals == other_raw_vals + && self_ignore_case == other_ignore_case + } +} + +impl Eq for MatchedArg {} + #[cfg(test)] mod tests { use super::*; + use std::sync::Arc; + #[test] fn test_grouped_vals_first() { let mut m = MatchedArg::new(); m.new_val_group(); m.new_val_group(); - m.append_val("bbb".into()); - m.append_val("ccc".into()); - assert_eq!(m.first(), Some(&OsString::from("bbb"))); + m.append_val(Arc::new(String::from("bbb")), "bbb".into()); + m.append_val(Arc::new(String::from("ccc")), "ccc".into()); + assert_eq!(m.first_raw(), Some(&OsString::from("bbb"))); } #[test] fn test_grouped_vals_push_and_append() { let mut m = MatchedArg::new(); - m.push_val("aaa".into()); + m.push_val(Arc::new(String::from("aaa")), "aaa".into()); m.new_val_group(); - m.append_val("bbb".into()); - m.append_val("ccc".into()); + m.append_val(Arc::new(String::from("bbb")), "bbb".into()); + m.append_val(Arc::new(String::from("ccc")), "ccc".into()); m.new_val_group(); - m.append_val("ddd".into()); - m.push_val("eee".into()); + m.append_val(Arc::new(String::from("ddd")), "ddd".into()); + m.push_val(Arc::new(String::from("eee")), "eee".into()); m.new_val_group(); - m.append_val("fff".into()); - m.append_val("ggg".into()); - m.append_val("hhh".into()); - m.append_val("iii".into()); + m.append_val(Arc::new(String::from("fff")), "fff".into()); + m.append_val(Arc::new(String::from("ggg")), "ggg".into()); + m.append_val(Arc::new(String::from("hhh")), "hhh".into()); + m.append_val(Arc::new(String::from("iii")), "iii".into()); - let vals: Vec<&Vec> = m.vals().collect(); + let vals: Vec<&Vec> = m.raw_vals().collect(); assert_eq!( vals, vec![ diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 1bfc48c8bbf..7bd57b9b91b 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -14,3 +14,5 @@ pub(crate) use self::parser::{ParseState, Parser}; pub(crate) use self::validator::Validator; pub use self::matches::{ArgMatches, Indices, OsValues, ValueSource, Values}; + +pub(crate) type AnyValue = std::sync::Arc; diff --git a/src/parser/parser.rs b/src/parser/parser.rs index 76895a4480b..1d0e9bd6e71 100644 --- a/src/parser/parser.rs +++ b/src/parser/parser.rs @@ -208,7 +208,7 @@ impl<'help, 'cmd> Parser<'help, 'cmd> { &parse_state, &mut valid_arg_found, trailing_values, - ); + )?; debug!( "Parser::get_matches_with: After parse_long_arg {:?}", parse_result @@ -281,7 +281,7 @@ impl<'help, 'cmd> Parser<'help, 'cmd> { pos_counter, &mut valid_arg_found, trailing_values, - ); + )?; // If it's None, we then check if one of those two AppSettings was set debug!( "Parser::get_matches_with: After parse_short_arg {:?}", @@ -363,7 +363,7 @@ impl<'help, 'cmd> Parser<'help, 'cmd> { ValueSource::CommandLine, true, trailing_values, - ); + )?; parse_state = match parse_result { ParseResult::Opt(id) => ParseState::Opt(id), ParseResult::ValuesDone => ParseState::ValuesDone, @@ -404,7 +404,7 @@ impl<'help, 'cmd> Parser<'help, 'cmd> { ValueSource::CommandLine, append, trailing_values, - ); + )?; // Only increment the positional counter if it doesn't allow multiples if !p.is_multiple() { @@ -414,7 +414,9 @@ impl<'help, 'cmd> Parser<'help, 'cmd> { parse_state = ParseState::Pos(p.id.clone()); } valid_arg_found = true; - } else if self.cmd.is_allow_external_subcommands_set() { + } else if let Some(external_parser) = + self.cmd.get_external_subcommand_value_parser().cloned() + { // Get external subcommand name let sc_name = match arg_os.to_value() { Ok(s) => s.to_string(), @@ -428,34 +430,21 @@ impl<'help, 'cmd> Parser<'help, 'cmd> { // Collect the external subcommand args let mut sc_m = ArgMatcher::new(self.cmd); - let allow_invalid_utf8 = self - .cmd - .is_allow_invalid_utf8_for_external_subcommands_set(); #[cfg(feature = "unstable-v4")] { - sc_m.inc_occurrence_of_external(allow_invalid_utf8); + sc_m.inc_occurrence_of_external(); } - for v in raw_args.remaining(&mut args_cursor) { - if !allow_invalid_utf8 && v.to_str().is_none() { - return Err(ClapError::invalid_utf8( - self.cmd, - Usage::new(self.cmd).create_usage_with_title(&[]), - )); - } + for raw_val in raw_args.remaining(&mut args_cursor) { + let val = external_parser.parse_ref(self.cmd, None, raw_val)?; let external_id = &Id::empty_hash(); sc_m.add_val_to( external_id, - v.to_os_string(), + val, + raw_val.to_os_string(), ValueSource::CommandLine, false, ); - #[cfg(not(feature = "unstable-v4"))] - { - sc_m.get_mut(external_id) - .expect("just inserted") - .invalid_utf8_allowed(allow_invalid_utf8); - } } matcher.subcommand(SubCommand { @@ -466,7 +455,7 @@ impl<'help, 'cmd> Parser<'help, 'cmd> { #[cfg(feature = "env")] self.add_env(matcher, trailing_values)?; - self.add_defaults(matcher, trailing_values); + self.add_defaults(matcher, trailing_values)?; return Validator::new(self.cmd).validate(parse_state, matcher); } else { // Start error processing @@ -486,7 +475,7 @@ impl<'help, 'cmd> Parser<'help, 'cmd> { #[cfg(feature = "env")] self.add_env(matcher, trailing_values)?; - self.add_defaults(matcher, trailing_values); + self.add_defaults(matcher, trailing_values)?; Validator::new(self.cmd).validate(parse_state, matcher) } @@ -800,14 +789,14 @@ impl<'help, 'cmd> Parser<'help, 'cmd> { parse_state: &ParseState, valid_arg_found: &mut bool, trailing_values: bool, - ) -> ParseResult { + ) -> ClapResult { // maybe here lifetime should be 'a debug!("Parser::parse_long_arg"); if matches!(parse_state, ParseState::Opt(opt) | ParseState::Pos(opt) if self.cmd[opt].is_allow_hyphen_values_set()) { - return ParseResult::MaybeHyphenValue; + return Ok(ParseResult::MaybeHyphenValue); } // Update the current index @@ -818,14 +807,14 @@ impl<'help, 'cmd> Parser<'help, 'cmd> { let long_arg = match long_arg { Ok(long_arg) => long_arg, Err(long_arg) => { - return ParseResult::NoMatchingArg { + return Ok(ParseResult::NoMatchingArg { arg: long_arg.to_str_lossy().into_owned(), - }; + }); } }; if long_arg.is_empty() { debug_assert!(long_value.is_none(), "{:?}", long_value); - return ParseResult::NoArg; + return Ok(ParseResult::NoArg); } let opt = if let Some(opt) = self.cmd.get_keymap().get(long_arg) { @@ -868,27 +857,27 @@ impl<'help, 'cmd> Parser<'help, 'cmd> { .cloned() .collect(); - ParseResult::UnneededAttachedValue { + Ok(ParseResult::UnneededAttachedValue { rest: rest.to_str_lossy().into_owned(), used, arg: opt.to_string(), - } + }) } else if let Some(parse_result) = self.check_for_help_and_version_str(RawOsStr::from_str(long_arg)) { - parse_result + Ok(parse_result) } else { debug!("Parser::parse_long_arg: Presence validated"); - self.parse_flag(opt, matcher) + Ok(self.parse_flag(opt, matcher)) } } else if let Some(sc_name) = self.possible_long_flag_subcommand(long_arg) { - ParseResult::FlagSubCommand(sc_name.to_string()) + Ok(ParseResult::FlagSubCommand(sc_name.to_string())) } else if self.cmd.is_allow_hyphen_values_set() { - ParseResult::MaybeHyphenValue + Ok(ParseResult::MaybeHyphenValue) } else { - ParseResult::NoMatchingArg { + Ok(ParseResult::NoMatchingArg { arg: long_arg.to_owned(), - } + }) } } @@ -901,25 +890,25 @@ impl<'help, 'cmd> Parser<'help, 'cmd> { pos_counter: usize, valid_arg_found: &mut bool, trailing_values: bool, - ) -> ParseResult { + ) -> ClapResult { debug!("Parser::parse_short_arg: short_arg={:?}", short_arg); #[allow(clippy::blocks_in_if_conditions)] if self.cmd.is_allow_negative_numbers_set() && short_arg.is_number() { debug!("Parser::parse_short_arg: negative number"); - return ParseResult::MaybeHyphenValue; + return Ok(ParseResult::MaybeHyphenValue); } else if self.cmd.is_allow_hyphen_values_set() && short_arg .clone() .any(|c| !c.map(|c| self.cmd.contains_short(c)).unwrap_or_default()) { debug!("Parser::parse_short_args: contains non-short flag"); - return ParseResult::MaybeHyphenValue; + return Ok(ParseResult::MaybeHyphenValue); } else if matches!(parse_state, ParseState::Opt(opt) | ParseState::Pos(opt) if self.cmd[opt].is_allow_hyphen_values_set()) { debug!("Parser::parse_short_args: prior arg accepts hyphenated values",); - return ParseResult::MaybeHyphenValue; + return Ok(ParseResult::MaybeHyphenValue); } else if self .cmd .get_keymap() @@ -932,7 +921,7 @@ impl<'help, 'cmd> Parser<'help, 'cmd> { "Parser::parse_short_args: positional at {} allows hyphens", pos_counter ); - return ParseResult::MaybeHyphenValue; + return Ok(ParseResult::MaybeHyphenValue); } let mut ret = ParseResult::NoArg; @@ -950,9 +939,9 @@ impl<'help, 'cmd> Parser<'help, 'cmd> { let c = match c { Ok(c) => c, Err(rest) => { - return ParseResult::NoMatchingArg { + return Ok(ParseResult::NoMatchingArg { arg: format!("-{}", rest.to_str_lossy()), - }; + }); } }; debug!("Parser::parse_short_arg:iter:{}", c); @@ -978,7 +967,7 @@ impl<'help, 'cmd> Parser<'help, 'cmd> { self.seen.push(opt.id.clone()); if !opt.is_takes_value_set() { if let Some(parse_result) = self.check_for_help_and_version_char(c) { - return parse_result; + return Ok(parse_result); } ret = self.parse_flag(opt, matcher); continue; @@ -1006,9 +995,9 @@ impl<'help, 'cmd> Parser<'help, 'cmd> { } else { (val, false) }; - match self.parse_opt(val, opt, matcher, trailing_values, has_eq) { + match self.parse_opt(val, opt, matcher, trailing_values, has_eq)? { ParseResult::AttachedValueNotConsumed => continue, - x => return x, + x => return Ok(x), } } @@ -1024,14 +1013,14 @@ impl<'help, 'cmd> Parser<'help, 'cmd> { if done_short_args { self.flag_subcmd_at = None; } - ParseResult::FlagSubCommand(name) + Ok(ParseResult::FlagSubCommand(name)) } else { - ParseResult::NoMatchingArg { + Ok(ParseResult::NoMatchingArg { arg: format!("-{}", c), - } + }) }; } - ret + Ok(ret) } fn parse_opt( @@ -1041,7 +1030,7 @@ impl<'help, 'cmd> Parser<'help, 'cmd> { matcher: &mut ArgMatcher, trailing_values: bool, has_eq: bool, - ) -> ParseResult { + ) -> ClapResult { debug!( "Parser::parse_opt; opt={}, val={:?}, has_eq={:?}", opt.name, attached_value, has_eq @@ -1063,18 +1052,18 @@ impl<'help, 'cmd> Parser<'help, 'cmd> { matcher, ValueSource::CommandLine, false, - ); + )?; }; if attached_value.is_some() { - ParseResult::AttachedValueNotConsumed + Ok(ParseResult::AttachedValueNotConsumed) } else { - ParseResult::ValuesDone + Ok(ParseResult::ValuesDone) } } else { debug!("Requires equals but not provided. Error."); - ParseResult::EqualsNotProvided { + Ok(ParseResult::EqualsNotProvided { arg: opt.to_string(), - } + }) } } else if let Some(v) = attached_value { self.inc_occurrence_of_arg(matcher, opt); @@ -1085,8 +1074,8 @@ impl<'help, 'cmd> Parser<'help, 'cmd> { ValueSource::CommandLine, false, trailing_values, - ); - ParseResult::ValuesDone + )?; + Ok(ParseResult::ValuesDone) } else { debug!("Parser::parse_opt: More arg vals required..."); self.inc_occurrence_of_arg(matcher, opt); @@ -1094,7 +1083,7 @@ impl<'help, 'cmd> Parser<'help, 'cmd> { for group in self.cmd.groups_for_arg(&opt.id) { matcher.new_val_group(&group); } - ParseResult::Opt(opt.id.clone()) + Ok(ParseResult::Opt(opt.id.clone())) } } @@ -1106,7 +1095,7 @@ impl<'help, 'cmd> Parser<'help, 'cmd> { ty: ValueSource, append: bool, trailing_values: bool, - ) -> ParseResult { + ) -> ClapResult { debug!("Parser::add_val_to_arg; arg={}, val={:?}", arg.name, val); debug!( "Parser::add_val_to_arg; trailing_values={:?}, DontDelimTrailingVals={:?}", @@ -1120,7 +1109,7 @@ impl<'help, 'cmd> Parser<'help, 'cmd> { .split(delim) .map(|x| x.to_os_str().into_owned()) .take_while(|val| Some(val.as_os_str()) != terminator); - self.add_multiple_vals_to_arg(arg, vals, matcher, ty, append); + self.add_multiple_vals_to_arg(arg, vals, matcher, ty, append)?; // If there was a delimiter used or we must use the delimiter to // separate the values or no more vals is needed, we're not // looking for more values. @@ -1128,33 +1117,33 @@ impl<'help, 'cmd> Parser<'help, 'cmd> { || arg.is_require_value_delimiter_set() || !matcher.needs_more_vals(arg) { - ParseResult::ValuesDone + Ok(ParseResult::ValuesDone) } else { - ParseResult::Opt(arg.id.clone()) + Ok(ParseResult::Opt(arg.id.clone())) }; } } if let Some(t) = arg.terminator { if t == val { - return ParseResult::ValuesDone; + return Ok(ParseResult::ValuesDone); } } - self.add_single_val_to_arg(arg, val.to_os_str().into_owned(), matcher, ty, append); + self.add_single_val_to_arg(arg, val.to_os_str().into_owned(), matcher, ty, append)?; if matcher.needs_more_vals(arg) { - ParseResult::Opt(arg.id.clone()) + Ok(ParseResult::Opt(arg.id.clone())) } else { - ParseResult::ValuesDone + Ok(ParseResult::ValuesDone) } } fn add_multiple_vals_to_arg( &self, arg: &Arg<'help>, - vals: impl Iterator, + raw_vals: impl Iterator, matcher: &mut ArgMatcher, ty: ValueSource, append: bool, - ) { + ) -> ClapResult<()> { // If not appending, create a new val group and then append vals in. if !append { matcher.new_val_group(&arg.id); @@ -1162,20 +1151,22 @@ impl<'help, 'cmd> Parser<'help, 'cmd> { matcher.new_val_group(&group); } } - for val in vals { - self.add_single_val_to_arg(arg, val, matcher, ty, true); + for raw_val in raw_vals { + self.add_single_val_to_arg(arg, raw_val, matcher, ty, true)?; } + + Ok(()) } fn add_single_val_to_arg( &self, arg: &Arg<'help>, - val: OsString, + raw_val: OsString, matcher: &mut ArgMatcher, ty: ValueSource, append: bool, - ) { - debug!("Parser::add_single_val_to_arg: adding val...{:?}", val); + ) -> ClapResult<()> { + debug!("Parser::add_single_val_to_arg: adding val...{:?}", raw_val); // update the current index because each value is a distinct index to clap self.cur_idx.set(self.cur_idx.get() + 1); @@ -1183,14 +1174,18 @@ impl<'help, 'cmd> Parser<'help, 'cmd> { "Parser::add_single_val_to_arg: cur_idx:={}", self.cur_idx.get() ); + let value_parser = arg.get_value_parser(); + let val = value_parser.parse_ref(self.cmd, Some(arg), &raw_val)?; // Increment or create the group "args" for group in self.cmd.groups_for_arg(&arg.id) { - matcher.add_val_to(&group, val.clone(), ty, append); + matcher.add_val_to(&group, val.clone(), raw_val.clone(), ty, append); } - matcher.add_val_to(&arg.id, val, ty, append); + matcher.add_val_to(&arg.id, val, raw_val, ty, append); matcher.add_index_to(&arg.id, self.cur_idx.get(), ty); + + Ok(()) } fn has_val_groups(&self, matcher: &mut ArgMatcher, arg: &Arg<'help>) -> bool { @@ -1228,18 +1223,24 @@ impl<'help, 'cmd> Parser<'help, 'cmd> { } } - pub(crate) fn add_defaults(&mut self, matcher: &mut ArgMatcher, trailing_values: bool) { + pub(crate) fn add_defaults( + &mut self, + matcher: &mut ArgMatcher, + trailing_values: bool, + ) -> ClapResult<()> { debug!("Parser::add_defaults"); for o in self.cmd.get_opts() { debug!("Parser::add_defaults:iter:{}:", o.name); - self.add_value(o, matcher, ValueSource::DefaultValue, trailing_values); + self.add_value(o, matcher, ValueSource::DefaultValue, trailing_values)?; } for p in self.cmd.get_positionals() { debug!("Parser::add_defaults:iter:{}:", p.name); - self.add_value(p, matcher, ValueSource::DefaultValue, trailing_values); + self.add_value(p, matcher, ValueSource::DefaultValue, trailing_values)?; } + + Ok(()) } fn add_value( @@ -1248,7 +1249,7 @@ impl<'help, 'cmd> Parser<'help, 'cmd> { matcher: &mut ArgMatcher, ty: ValueSource, trailing_values: bool, - ) { + ) -> ClapResult<()> { if !arg.default_vals_ifs.is_empty() { debug!("Parser::add_value: has conditional defaults"); if matcher.get(&arg.id).is_none() { @@ -1256,7 +1257,7 @@ impl<'help, 'cmd> Parser<'help, 'cmd> { let add = if let Some(a) = matcher.get(id) { match val { crate::builder::ArgPredicate::Equals(v) => { - a.vals_flatten().any(|value| v == value) + a.raw_vals_flatten().any(|value| v == value) } crate::builder::ArgPredicate::IsPresent => true, } @@ -1273,9 +1274,9 @@ impl<'help, 'cmd> Parser<'help, 'cmd> { ty, false, trailing_values, - ); + )?; } - return; + return Ok(()); } } } @@ -1312,7 +1313,7 @@ impl<'help, 'cmd> Parser<'help, 'cmd> { matcher, ty, false, - ); + )?; } } else { debug!( @@ -1340,7 +1341,7 @@ impl<'help, 'cmd> Parser<'help, 'cmd> { matcher, ty, false, - ); + )?; } None => { debug!("Parser::add_value:iter:{}: wasn't used", arg.name); @@ -1359,6 +1360,8 @@ impl<'help, 'cmd> Parser<'help, 'cmd> { // do nothing } + + Ok(()) } #[cfg(feature = "env")] @@ -1396,7 +1399,7 @@ impl<'help, 'cmd> Parser<'help, 'cmd> { ValueSource::EnvVariable, false, trailing_values, - ); + )?; return Ok(()); } diff --git a/src/parser/validator.rs b/src/parser/validator.rs index bc5db7606f5..14304b9fca2 100644 --- a/src/parser/validator.rs +++ b/src/parser/validator.rs @@ -98,19 +98,7 @@ impl<'help, 'cmd> Validator<'help, 'cmd> { matcher: &ArgMatcher, ) -> ClapResult<()> { debug!("Validator::validate_arg_values: arg={:?}", arg.name); - for val in ma.vals_flatten() { - if !arg.is_allow_invalid_utf8_set() && val.to_str().is_none() { - debug!( - "Validator::validate_arg_values: invalid UTF-8 found in val {:?}", - val - ); - return Err(Error::invalid_utf8( - self.cmd, - Usage::new(self.cmd) - .required(&self.required) - .create_usage_with_title(&[]), - )); - } + for val in ma.raw_vals_flatten() { if !arg.possible_vals.is_empty() { debug!( "Validator::validate_arg_values: possible_vals={:?}", @@ -421,7 +409,7 @@ impl<'help, 'cmd> Validator<'help, 'cmd> { debug!("Validator::validate_arg_num_vals: Sending error TooManyValues"); return Err(Error::too_many_values( self.cmd, - ma.vals_flatten() + ma.raw_vals_flatten() .last() .expect(INTERNAL_ERROR_MSG) .to_str() diff --git a/tests/builder/utf8.rs b/tests/builder/utf8.rs index 8e502a71258..ece5614708b 100644 --- a/tests/builder/utf8.rs +++ b/tests/builder/utf8.rs @@ -404,9 +404,8 @@ fn panic_validated_utf8_value_of_os() { } #[test] -fn ignore_validated_utf8_with_defaults() { - // For now, we don't check the correct call is used with defaults (due to pain of piping it - // through the code) but we need to make sure we don't erroneously panic. +#[should_panic = "Must use `Arg::allow_invalid_utf8` with `_os` lookups at `value`"] +fn panic_validated_utf8_with_defaults() { let a = Command::new("test").arg(arg!(--value ).required(false).default_value("foo")); let m = a.try_get_matches_from(["test"]).unwrap(); let _ = m.value_of("value"); @@ -429,9 +428,8 @@ fn panic_invalid_utf8_value_of() { } #[test] -fn ignore_invalid_utf8_with_defaults() { - // For now, we don't check the correct call is used with defaults (due to pain of piping it - // through the code) but we need to make sure we don't erroneously panic. +#[should_panic = "Must use `_os` lookups with `Arg::allow_invalid_utf8` at `value`"] +fn panic_invalid_utf8_with_defaults() { let a = Command::new("test").arg( arg!(--value ) .required(false) @@ -439,8 +437,8 @@ fn ignore_invalid_utf8_with_defaults() { .allow_invalid_utf8(true), ); let m = a.try_get_matches_from(["test"]).unwrap(); - let _ = m.value_of("value"); let _ = m.value_of_os("value"); + let _ = m.value_of("value"); } #[test] @@ -448,16 +446,16 @@ fn allow_validated_utf8_external_subcommand_values_of() { let a = Command::new("test").allow_external_subcommands(true); let m = a.try_get_matches_from(vec!["test", "cmd", "arg"]).unwrap(); let (_ext, args) = m.subcommand().unwrap(); - let _ = args.values_of(""); + args.values_of("").unwrap_or_default().count(); } #[test] -#[should_panic = "Must use `Arg::allow_invalid_utf8` with `_os` lookups at ``"] +#[should_panic = "Must use `Arg::allow_invalid_utf8` with `_os` lookups"] fn panic_validated_utf8_external_subcommand_values_of_os() { let a = Command::new("test").allow_external_subcommands(true); let m = a.try_get_matches_from(vec!["test", "cmd", "arg"]).unwrap(); let (_ext, args) = m.subcommand().unwrap(); - let _ = args.values_of_os(""); + args.values_of_os("").unwrap_or_default().count(); } #[test] @@ -467,16 +465,16 @@ fn allow_invalid_utf8_external_subcommand_values_of_os() { .allow_invalid_utf8_for_external_subcommands(true); let m = a.try_get_matches_from(vec!["test", "cmd", "arg"]).unwrap(); let (_ext, args) = m.subcommand().unwrap(); - let _ = args.values_of_os(""); + args.values_of_os("").unwrap_or_default().count(); } #[test] -#[should_panic = "Must use `_os` lookups with `Arg::allow_invalid_utf8` at ``"] +#[should_panic = "Must use `_os` lookups with `Arg::allow_invalid_utf8`"] fn panic_invalid_utf8_external_subcommand_values_of() { let a = Command::new("test") .allow_external_subcommands(true) .allow_invalid_utf8_for_external_subcommands(true); let m = a.try_get_matches_from(vec!["test", "cmd", "arg"]).unwrap(); let (_ext, args) = m.subcommand().unwrap(); - let _ = args.values_of(""); + args.values_of("").unwrap_or_default().count(); }