diff --git a/CHANGELOG.md b/CHANGELOG.md index cbdbbc3937a6..5c86caa2e7dc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,9 @@ TODO: `cargo`, `std` features * **ErrorKind** * `ErrorKind::MissingArgumentOrSubcommand` => `ErrorKind::DisplayHelpOnMissingArgumentOrSubcommand` * **Changed** + * `AppSettings::StrictUtf8` is now default and it and `AppSettings::AllowInvalidUtf8` are replaced by + * `AppSettings::AllowInvalidUtf8ForExternalSubcommands` + * `ArgSettings::AllowInvalidUtf8` * Allowing empty values is the default again with `ArgSettings::AllowEmptyValues` changing to `ArgSettings::ForbidEmptyValues` * `Arg::env`, `Arg::env_os`, `Arg::last`, `Arg::require_equals`, `Arg::allow_hyphen_values`, diff --git a/clap_derive/src/derives/args.rs b/clap_derive/src/derives/args.rs index ce32b8676a50..2ee42ae3d9de 100644 --- a/clap_derive/src/derives/args.rs +++ b/clap_derive/src/derives/args.rs @@ -228,7 +228,22 @@ pub fn gen_augment( ParserKind::TryFromOsStr => quote_spanned! { func.span()=> .validator_os(|s| #func(s).map(|_: #convert_type| ())) }, - _ => quote!(), + ParserKind::FromStr + | ParserKind::FromOsStr + | ParserKind::FromFlag + | ParserKind::FromOccurrences => quote!(), + }; + let allow_invalid_utf8 = match *parser.kind { + _ if attrs.is_enum() => quote!(), + ParserKind::FromOsStr | ParserKind::TryFromOsStr => { + quote_spanned! { func.span()=> + .allow_invalid_utf8(true) + } + } + ParserKind::FromStr + | ParserKind::TryFromStr + | ParserKind::FromFlag + | ParserKind::FromOccurrences => quote!(), }; let value_name = if attrs.has_method("value_name") { @@ -258,6 +273,7 @@ pub fn gen_augment( #value_name #possible_values #validator + #allow_invalid_utf8 } } @@ -268,6 +284,7 @@ pub fn gen_augment( .max_values(1) .multiple_values(false) #validator + #allow_invalid_utf8 }, Ty::OptionVec => quote_spanned! { ty.span()=> @@ -276,6 +293,7 @@ pub fn gen_augment( .multiple_values(true) .min_values(0) #validator + #allow_invalid_utf8 }, Ty::Vec => { @@ -293,6 +311,7 @@ pub fn gen_augment( .multiple_values(true) #possible_values #validator + #allow_invalid_utf8 } } @@ -319,6 +338,7 @@ pub fn gen_augment( .required(#required) #possible_values #validator + #allow_invalid_utf8 } } }; diff --git a/clap_derive/src/derives/subcommand.rs b/clap_derive/src/derives/subcommand.rs index af0f78546209..8b5642360603 100644 --- a/clap_derive/src/derives/subcommand.rs +++ b/clap_derive/src/derives/subcommand.rs @@ -131,8 +131,34 @@ fn gen_augment( match &*kind { Kind::ExternalSubcommand => { - quote_spanned! { kind.span()=> - let app = app.setting(clap::AppSettings::AllowExternalSubcommands); + let ty = match variant.fields { + Unnamed(ref fields) if fields.unnamed.len() == 1 => &fields.unnamed[0].ty, + + _ => abort!( + variant, + "The enum variant marked with `external_subcommand` must be \ + a single-typed tuple, and the type must be either `Vec` \ + or `Vec`." + ), + }; + match subty_if_name(ty, "Vec") { + Some(subty) => { + if is_simple_ty(subty, "OsString") { + quote_spanned! { kind.span()=> + let app = app.setting(clap::AppSettings::AllowExternalSubcommands).setting(clap::AppSettings::AllowInvalidUtf8ForExternalSubcommands); + } + } else { + quote_spanned! { kind.span()=> + let app = app.setting(clap::AppSettings::AllowExternalSubcommands); + } + } + } + + None => abort!( + ty.span(), + "The type must be either `Vec` or `Vec` \ + to be used with `external_subcommand`." + ), } } @@ -348,7 +374,7 @@ fn gen_from_arg_matches( _ => abort!( variant, - "The enum variant marked with `external_attribute` must be \ + "The enum variant marked with `external_subcommand` must be \ a single-typed tuple, and the type must be either `Vec` \ or `Vec`." ), diff --git a/clap_derive/tests/utf8.rs b/clap_derive/tests/utf8.rs new file mode 100644 index 000000000000..083e3e6d86e1 --- /dev/null +++ b/clap_derive/tests/utf8.rs @@ -0,0 +1,226 @@ +#![cfg(not(windows))] + +use clap::{Clap, ErrorKind}; +use std::ffi::OsString; +use std::os::unix::ffi::OsStringExt; + +#[derive(Clap, Debug, PartialEq, Eq)] +struct Positional { + arg: String, +} + +#[derive(Clap, Debug, PartialEq, Eq)] +struct Named { + #[clap(short, long)] + arg: String, +} + +#[test] +fn invalid_utf8_strict_positional() { + let m = Positional::try_parse_from(vec![OsString::from(""), OsString::from_vec(vec![0xe9])]); + assert!(m.is_err()); + assert_eq!(m.unwrap_err().kind, ErrorKind::InvalidUtf8); +} + +#[test] +fn invalid_utf8_strict_option_short_space() { + let m = Named::try_parse_from(vec![ + OsString::from(""), + OsString::from("-a"), + OsString::from_vec(vec![0xe9]), + ]); + assert!(m.is_err()); + assert_eq!(m.unwrap_err().kind, ErrorKind::InvalidUtf8); +} + +#[test] +fn invalid_utf8_strict_option_short_equals() { + let m = Named::try_parse_from(vec![ + OsString::from(""), + OsString::from_vec(vec![0x2d, 0x61, 0x3d, 0xe9]), + ]); + assert!(m.is_err()); + assert_eq!(m.unwrap_err().kind, ErrorKind::InvalidUtf8); +} + +#[test] +fn invalid_utf8_strict_option_short_no_space() { + let m = Named::try_parse_from(vec![ + OsString::from(""), + OsString::from_vec(vec![0x2d, 0x61, 0xe9]), + ]); + assert!(m.is_err()); + assert_eq!(m.unwrap_err().kind, ErrorKind::InvalidUtf8); +} + +#[test] +fn invalid_utf8_strict_option_long_space() { + let m = Named::try_parse_from(vec![ + OsString::from(""), + OsString::from("--arg"), + OsString::from_vec(vec![0xe9]), + ]); + assert!(m.is_err()); + assert_eq!(m.unwrap_err().kind, ErrorKind::InvalidUtf8); +} + +#[test] +fn invalid_utf8_strict_option_long_equals() { + let m = Named::try_parse_from(vec![ + OsString::from(""), + OsString::from_vec(vec![0x2d, 0x2d, 0x61, 0x72, 0x67, 0x3d, 0xe9]), + ]); + assert!(m.is_err()); + assert_eq!(m.unwrap_err().kind, ErrorKind::InvalidUtf8); +} + +#[derive(Clap, Debug, PartialEq, Eq)] +struct PositionalOs { + #[clap(parse(from_os_str))] + arg: OsString, +} + +#[derive(Clap, Debug, PartialEq, Eq)] +struct NamedOs { + #[clap(short, long, parse(from_os_str))] + arg: OsString, +} + +#[test] +fn invalid_utf8_positional() { + let r = PositionalOs::try_parse_from(vec![OsString::from(""), OsString::from_vec(vec![0xe9])]); + assert_eq!( + r.unwrap(), + PositionalOs { + arg: OsString::from_vec(vec![0xe9]) + } + ); +} + +#[test] +fn invalid_utf8_option_short_space() { + let r = NamedOs::try_parse_from(vec![ + OsString::from(""), + OsString::from("-a"), + OsString::from_vec(vec![0xe9]), + ]); + assert_eq!( + r.unwrap(), + NamedOs { + arg: OsString::from_vec(vec![0xe9]) + } + ); +} + +#[test] +fn invalid_utf8_option_short_equals() { + let r = NamedOs::try_parse_from(vec![ + OsString::from(""), + OsString::from_vec(vec![0x2d, 0x61, 0x3d, 0xe9]), + ]); + assert_eq!( + r.unwrap(), + NamedOs { + arg: OsString::from_vec(vec![0xe9]) + } + ); +} + +#[test] +fn invalid_utf8_option_short_no_space() { + let r = NamedOs::try_parse_from(vec![ + OsString::from(""), + OsString::from_vec(vec![0x2d, 0x61, 0xe9]), + ]); + assert_eq!( + r.unwrap(), + NamedOs { + arg: OsString::from_vec(vec![0xe9]) + } + ); +} + +#[test] +fn invalid_utf8_option_long_space() { + let r = NamedOs::try_parse_from(vec![ + OsString::from(""), + OsString::from("--arg"), + OsString::from_vec(vec![0xe9]), + ]); + assert_eq!( + r.unwrap(), + NamedOs { + arg: OsString::from_vec(vec![0xe9]) + } + ); +} + +#[test] +fn invalid_utf8_option_long_equals() { + let r = NamedOs::try_parse_from(vec![ + OsString::from(""), + OsString::from_vec(vec![0x2d, 0x2d, 0x61, 0x72, 0x67, 0x3d, 0xe9]), + ]); + assert_eq!( + r.unwrap(), + NamedOs { + arg: OsString::from_vec(vec![0xe9]) + } + ); +} + +#[derive(Debug, PartialEq, Clap)] +enum External { + #[clap(external_subcommand)] + Other(Vec), +} + +#[test] +fn refuse_invalid_utf8_subcommand_with_allow_external_subcommands() { + let m = External::try_parse_from(vec![ + OsString::from(""), + OsString::from_vec(vec![0xe9]), + OsString::from("normal"), + ]); + assert!(m.is_err()); + assert_eq!(m.unwrap_err().kind, ErrorKind::InvalidUtf8); +} + +#[test] +fn refuse_invalid_utf8_subcommand_args_with_allow_external_subcommands() { + let m = External::try_parse_from(vec![ + OsString::from(""), + OsString::from("subcommand"), + OsString::from("normal"), + OsString::from_vec(vec![0xe9]), + OsString::from("--another_normal"), + ]); + assert!(m.is_err()); + assert_eq!(m.unwrap_err().kind, ErrorKind::InvalidUtf8); +} + +#[derive(Debug, PartialEq, Clap)] +enum ExternalOs { + #[clap(external_subcommand)] + Other(Vec), +} + +#[test] +fn allow_invalid_utf8_subcommand_args_with_allow_external_subcommands() { + let m = ExternalOs::try_parse_from(vec![ + OsString::from(""), + OsString::from("subcommand"), + OsString::from("normal"), + OsString::from_vec(vec![0xe9]), + OsString::from("--another_normal"), + ]); + assert_eq!( + m.unwrap(), + ExternalOs::Other(vec![ + OsString::from("subcommand"), + OsString::from("normal"), + OsString::from_vec(vec![0xe9]), + OsString::from("--another_normal"), + ]) + ); +} diff --git a/src/build/app/debug_asserts.rs b/src/build/app/debug_asserts.rs index 2c5a94062f3f..57f38d7edffb 100644 --- a/src/build/app/debug_asserts.rs +++ b/src/build/app/debug_asserts.rs @@ -260,6 +260,7 @@ pub(crate) fn assert_app(app: &App) { detect_duplicate_flags(&short_flags, "short"); app._panic_on_missing_help(app.g_settings.is_set(AppSettings::HelpRequired)); + assert_app_flags(app); } fn detect_duplicate_flags(flags: &[Flag], short_or_long: &str) { @@ -301,3 +302,27 @@ fn find_duplicates(slice: &[T]) -> impl Iterator } }) } + +fn assert_app_flags(app: &App) { + use AppSettings::*; + + macro_rules! checker { + ($a:ident requires $($b:ident)|+) => { + if app.is_set($a) { + let mut s = String::new(); + + $( + if !app.is_set($b) { + s.push_str(&format!("\nAppSettings::{} is required when AppSettings::{} is set.\n", std::stringify!($b), std::stringify!($a))); + } + )+ + + if !s.is_empty() { + panic!("{}", s) + } + } + } + } + + checker!(AllowInvalidUtf8ForExternalSubcommands requires AllowExternalSubcommands); +} diff --git a/src/build/app/settings.rs b/src/build/app/settings.rs index 1d788637e54f..ee6f3aeb956c 100644 --- a/src/build/app/settings.rs +++ b/src/build/app/settings.rs @@ -21,37 +21,36 @@ bitflags! { const TRAILING_VARARG = 1 << 12; const NO_BIN_NAME = 1 << 13; const ALLOW_UNK_SC = 1 << 14; - const UTF8_STRICT = 1 << 15; - const UTF8_NONE = 1 << 16; - const LEADING_HYPHEN = 1 << 17; - const NO_POS_VALUES = 1 << 18; - const NEXT_LINE_HELP = 1 << 19; - const DERIVE_DISP_ORDER = 1 << 20; - const COLORED_HELP = 1 << 21; - const COLOR_ALWAYS = 1 << 22; - const COLOR_AUTO = 1 << 23; - const COLOR_NEVER = 1 << 24; - const DONT_DELIM_TRAIL = 1 << 25; - const ALLOW_NEG_NUMS = 1 << 26; - const DISABLE_HELP_SC = 1 << 28; - const DONT_COLLAPSE_ARGS = 1 << 29; - const ARGS_NEGATE_SCS = 1 << 30; - const PROPAGATE_VALS_DOWN = 1 << 31; - const ALLOW_MISSING_POS = 1 << 32; - const TRAILING_VALUES = 1 << 33; - const BUILT = 1 << 34; - const BIN_NAME_BUILT = 1 << 35; - const VALID_ARG_FOUND = 1 << 36; - const INFER_SUBCOMMANDS = 1 << 37; - const CONTAINS_LAST = 1 << 38; - const ARGS_OVERRIDE_SELF = 1 << 39; - const HELP_REQUIRED = 1 << 40; - const SUBCOMMAND_PRECEDENCE_OVER_ARG = 1 << 41; - const DISABLE_HELP_FLAG = 1 << 42; - const USE_LONG_FORMAT_FOR_HELP_SC = 1 << 43; - const DISABLE_ENV = 1 << 44; - const INFER_LONG_ARGS = 1 << 45; - const IGNORE_ERRORS = 1 << 46; + const SC_UTF8_NONE = 1 << 15; + const LEADING_HYPHEN = 1 << 16; + const NO_POS_VALUES = 1 << 17; + const NEXT_LINE_HELP = 1 << 18; + const DERIVE_DISP_ORDER = 1 << 19; + const COLORED_HELP = 1 << 20; + const COLOR_ALWAYS = 1 << 21; + const COLOR_AUTO = 1 << 22; + const COLOR_NEVER = 1 << 23; + const DONT_DELIM_TRAIL = 1 << 24; + const ALLOW_NEG_NUMS = 1 << 25; + const DISABLE_HELP_SC = 1 << 27; + const DONT_COLLAPSE_ARGS = 1 << 28; + const ARGS_NEGATE_SCS = 1 << 29; + const PROPAGATE_VALS_DOWN = 1 << 30; + const ALLOW_MISSING_POS = 1 << 31; + const TRAILING_VALUES = 1 << 32; + const BUILT = 1 << 33; + const BIN_NAME_BUILT = 1 << 34; + const VALID_ARG_FOUND = 1 << 35; + const INFER_SUBCOMMANDS = 1 << 36; + const CONTAINS_LAST = 1 << 37; + const ARGS_OVERRIDE_SELF = 1 << 38; + const HELP_REQUIRED = 1 << 39; + const SUBCOMMAND_PRECEDENCE_OVER_ARG = 1 << 40; + const DISABLE_HELP_FLAG = 1 << 41; + const USE_LONG_FORMAT_FOR_HELP_SC = 1 << 42; + const DISABLE_ENV = 1 << 43; + const INFER_LONG_ARGS = 1 << 44; + const IGNORE_ERRORS = 1 << 45; } } @@ -67,7 +66,7 @@ impl BitOr for AppFlags { impl Default for AppFlags { fn default() -> Self { - AppFlags(Flags::UTF8_NONE | Flags::COLOR_AUTO) + AppFlags(Flags::COLOR_AUTO) } } @@ -80,8 +79,8 @@ impl_settings! { AppSettings, AppFlags, => Flags::ARGS_NEGATE_SCS, AllowExternalSubcommands("allowexternalsubcommands") => Flags::ALLOW_UNK_SC, - AllowInvalidUtf8("allowinvalidutf8") - => Flags::UTF8_NONE, + AllowInvalidUtf8ForExternalSubcommands("allowinvalidutf8externalsubcommands") + => Flags::SC_UTF8_NONE, AllowLeadingHyphen("allowleadinghyphen") => Flags::LEADING_HYPHEN, AllowNegativeNumbers("allownegativenumbers") @@ -124,8 +123,6 @@ impl_settings! { AppSettings, AppFlags, => Flags::NO_AUTO_VERSION, NoBinaryName("nobinaryname") => Flags::NO_BIN_NAME, - StrictUtf8("strictutf8") - => Flags::UTF8_STRICT, SubcommandsNegateReqs("subcommandsnegatereqs") => Flags::SC_NEGATE_REQS, SubcommandRequired("subcommandrequired") @@ -166,16 +163,13 @@ impl_settings! { AppSettings, AppFlags, /// [`App`]: crate::App #[derive(Debug, PartialEq, Copy, Clone)] pub enum AppSettings { - /// Specifies that any invalid UTF-8 code points should *not* be treated as an error. - /// This is the default behavior of `clap`. + /// Specifies that external subcommands that are invalid UTF-8 should *not* be treated as an error. /// - /// **NOTE:** Using argument values with invalid UTF-8 code points requires using - /// [`ArgMatches::value_of_os`], [`ArgMatches::values_of_os`], [`ArgMatches::value_of_lossy`], - /// or [`ArgMatches::values_of_lossy`] for those particular arguments which may contain invalid - /// UTF-8 values + /// **NOTE:** Using external subcommand argument values with invalid UTF-8 requires using + /// [`ArgMatches::values_of_os`] or [`ArgMatches::values_of_lossy`] for those particular + /// arguments which may contain invalid UTF-8 values /// - /// **NOTE:** This rule only applies to argument values. Flags, options, and - /// [`subcommands`] themselves only allow valid UTF-8 code points. + /// **NOTE:** Setting this requires [`AppSettings::AllowExternalSubcommands`] /// /// # Platform Specific /// @@ -186,29 +180,30 @@ pub enum AppSettings { #[cfg_attr(not(unix), doc = " ```ignore")] #[cfg_attr(unix, doc = " ```")] /// # use clap::{App, AppSettings}; - /// use std::ffi::OsString; - /// use std::os::unix::ffi::{OsStrExt,OsStringExt}; - /// - /// let r = App::new("myprog") - /// //.setting(AppSettings::AllowInvalidUtf8) - /// .arg(" 'some positional arg'") - /// .try_get_matches_from( - /// vec![ - /// OsString::from("myprog"), - /// OsString::from_vec(vec![0xe9])]); - /// - /// assert!(r.is_ok()); - /// let m = r.unwrap(); - /// assert_eq!(m.value_of_os("arg").unwrap().as_bytes(), &[0xe9]); + /// // Assume there is an external subcommand named "subcmd" + /// let m = App::new("myprog") + /// .setting(AppSettings::AllowInvalidUtf8ForExternalSubcommands) + /// .setting(AppSettings::AllowExternalSubcommands) + /// .get_matches_from(vec![ + /// "myprog", "subcmd", "--option", "value", "-fff", "--flag" + /// ]); + /// + /// // All trailing arguments will be stored under the subcommand's sub-matches using an empty + /// // string argument name + /// match m.subcommand() { + /// Some((external, ext_m)) => { + /// let ext_args: Vec<&std::ffi::OsStr> = ext_m.values_of_os("").unwrap().collect(); + /// assert_eq!(external, "subcmd"); + /// assert_eq!(ext_args, ["--option", "value", "-fff", "--flag"]); + /// }, + /// _ => {}, + /// } /// ``` /// - /// [`ArgMatches::value_of_os`]: crate::ArgMatches::value_of_os() /// [`ArgMatches::values_of_os`]: crate::ArgMatches::values_of_os() - /// [`ArgMatches::value_of_lossy`]: crate::ArgMatches::value_of_lossy() /// [`ArgMatches::values_of_lossy`]: crate::ArgMatches::values_of_lossy() /// [`subcommands`]: crate::App::subcommand() - // TODO: Either this or StrictUtf8 - AllowInvalidUtf8, + AllowInvalidUtf8ForExternalSubcommands, /// Specifies that leading hyphens are allowed in all argument *values*, such as negative numbers /// like `-10`. (which would otherwise be parsed as another flag or option) @@ -976,40 +971,6 @@ pub enum AppSettings { /// [long format]: crate::App::long_about UseLongFormatForHelpSubcommand, - /// Specifies that any invalid UTF-8 code points should be treated as an error and fail - /// with a [`ErrorKind::InvalidUtf8`] error. - /// - /// **NOTE:** This rule only applies to argument values; Things such as flags, options, and - /// [`subcommands`] themselves only allow valid UTF-8 code points. - /// - /// # Platform Specific - /// - /// Non Windows systems only - /// - /// # Examples - /// - #[cfg_attr(not(unix), doc = " ```ignore")] - #[cfg_attr(unix, doc = " ```")] - /// # use clap::{App, AppSettings, ErrorKind}; - /// use std::ffi::OsString; - /// use std::os::unix::ffi::OsStringExt; - /// - /// let m = App::new("myprog") - /// .setting(AppSettings::StrictUtf8) - /// .arg(" 'some positional arg'") - /// .try_get_matches_from( - /// vec![ - /// OsString::from("myprog"), - /// OsString::from_vec(vec![0xe9])]); - /// - /// assert!(m.is_err()); - /// assert_eq!(m.unwrap_err().kind, ErrorKind::InvalidUtf8); - /// ``` - /// - /// [`subcommands`]: crate::App::subcommand() - /// [`ErrorKind::InvalidUtf8`]: crate::ErrorKind::InvalidUtf8 - StrictUtf8, - /// Allows specifying that if no [`subcommand`] is present at runtime, /// error and exit gracefully. /// @@ -1134,8 +1095,10 @@ mod test { AppSettings::AllowExternalSubcommands ); assert_eq!( - "allowinvalidutf8".parse::().unwrap(), - AppSettings::AllowInvalidUtf8 + "allowinvalidutf8externalsubcommands" + .parse::() + .unwrap(), + AppSettings::AllowInvalidUtf8ForExternalSubcommands ); assert_eq!( "allowleadinghyphen".parse::().unwrap(), @@ -1223,10 +1186,6 @@ mod test { .unwrap(), AppSettings::UseLongFormatForHelpSubcommand ); - assert_eq!( - "strictutf8".parse::().unwrap(), - AppSettings::StrictUtf8 - ); assert_eq!( "trailingvararg".parse::().unwrap(), AppSettings::TrailingVarArg diff --git a/src/build/arg/debug_asserts.rs b/src/build/arg/debug_asserts.rs index 1deab32a82ea..cc5903d30306 100644 --- a/src/build/arg/debug_asserts.rs +++ b/src/build/arg/debug_asserts.rs @@ -52,10 +52,10 @@ pub(crate) fn assert_arg(arg: &Arg) { ); } - assert_app_flags(arg); + assert_arg_flags(arg); } -fn assert_app_flags(arg: &Arg) { +fn assert_arg_flags(arg: &Arg) { use ArgSettings::*; macro_rules! checker { diff --git a/src/build/arg/mod.rs b/src/build/arg/mod.rs index 54f6f96bad70..8770be12ad7f 100644 --- a/src/build/arg/mod.rs +++ b/src/build/arg/mod.rs @@ -2071,6 +2071,46 @@ impl<'help> Arg<'help> { self.takes_value(true).multiple_values(true) } + /// Specifies that option values that are invalid UTF-8 should *not* be treated as an error. + /// + /// **NOTE:** Using argument values with invalid UTF-8 code points requires using + /// [`ArgMatches::value_of_os`], [`ArgMatches::values_of_os`], [`ArgMatches::value_of_lossy`], + /// or [`ArgMatches::values_of_lossy`] for those particular arguments which may contain invalid + /// UTF-8 values. + /// + /// **NOTE:** Setting this requires [`ArgSettings::TakesValue`] + /// + /// # Examples + /// + #[cfg_attr(not(unix), doc = " ```ignore")] + #[cfg_attr(unix, doc = " ```rust")] + /// # use clap::{App, Arg}; + /// use std::ffi::OsString; + /// use std::os::unix::ffi::{OsStrExt,OsStringExt}; + /// let r = App::new("myprog") + /// .arg(Arg::new("arg").allow_invalid_utf8(true)) + /// .try_get_matches_from(vec![ + /// OsString::from("myprog"), + /// OsString::from_vec(vec![0xe9]) + /// ]); + /// + /// assert!(r.is_ok()); + /// let m = r.unwrap(); + /// assert_eq!(m.value_of_os("arg").unwrap().as_bytes(), &[0xe9]); + /// ``` + /// [`ArgMatches::value_of_os`]: crate::ArgMatches::value_of_os() + /// [`ArgMatches::values_of_os`]: crate::ArgMatches::values_of_os() + /// [`ArgMatches::value_of_lossy`]: crate::ArgMatches::value_of_lossy() + /// [`ArgMatches::values_of_lossy`]: crate::ArgMatches::values_of_lossy() + #[inline] + pub fn allow_invalid_utf8(self, tv: bool) -> Self { + if tv { + self.setting(ArgSettings::AllowInvalidUtf8) + } else { + self.unset_setting(ArgSettings::AllowInvalidUtf8) + } + } + /// Allows one to perform a custom validation on the argument value. You provide a closure /// which accepts a [`String`] value, and return a [`Result`] where the [`Err(String)`] is a /// message displayed to the user. diff --git a/src/build/arg/settings.rs b/src/build/arg/settings.rs index 7143aa51aff5..7e347b397775 100644 --- a/src/build/arg/settings.rs +++ b/src/build/arg/settings.rs @@ -28,6 +28,7 @@ bitflags! { const HIDDEN_LONG_H = 1 << 19; const MULTIPLE_VALS = 1 << 20; const HIDE_ENV = 1 << 21; + const UTF8_NONE = 1 << 22; } } @@ -55,7 +56,8 @@ impl_settings! { ArgSettings, ArgFlags, HideEnvValues("hideenvvalues") => Flags::HIDE_ENV_VALS, HideDefaultValue("hidedefaultvalue") => Flags::HIDE_DEFAULT_VAL, HiddenShortHelp("hiddenshorthelp") => Flags::HIDDEN_SHORT_H, - HiddenLongHelp("hiddenlonghelp") => Flags::HIDDEN_LONG_H + HiddenLongHelp("hiddenlonghelp") => Flags::HIDDEN_LONG_H, + AllowInvalidUtf8("allowinvalidutf8") => Flags::UTF8_NONE } impl Default for ArgFlags { @@ -115,6 +117,9 @@ pub enum ArgSettings { HiddenShortHelp, /// The argument should **not** be shown in long help text HiddenLongHelp, + /// Specifies that option values that are invalid UTF-8 should *not* be treated as an error. + AllowInvalidUtf8, + #[doc(hidden)] RequiredUnlessAll, } @@ -194,6 +199,10 @@ mod test { "hiddenlonghelp".parse::().unwrap(), ArgSettings::HiddenLongHelp ); + assert_eq!( + "allowinvalidutf8".parse::().unwrap(), + ArgSettings::AllowInvalidUtf8 + ); assert!("hahahaha".parse::().is_err()); } } diff --git a/src/parse/matches/arg_matches.rs b/src/parse/matches/arg_matches.rs index 1d0ef6355170..23c84b8d3daa 100644 --- a/src/parse/matches/arg_matches.rs +++ b/src/parse/matches/arg_matches.rs @@ -101,7 +101,8 @@ impl ArgMatches { /// /// # Panics /// - /// This method will [`panic!`] if the value contains invalid UTF-8 code points. + /// This method will [`panic!`] if the value is invalid UTF-8. See + /// [`ArgSettings::AllowInvalidUtf8`][crate::ArgSettings::AllowInvalidUtf8]. /// /// # Examples /// @@ -129,9 +130,11 @@ impl ArgMatches { } /// Gets the lossy value of a specific argument. If the argument wasn't present at runtime - /// it returns `None`. A lossy value is one which contains invalid UTF-8 code points, those + /// it returns `None`. A lossy value is one which contains invalid UTF-8, those /// invalid points will be replaced with `\u{FFFD}` /// + /// *NOTE:* Recommend having set [`ArgSettings::AllowInvalidUtf8`][crate::ArgSettings::AllowInvalidUtf8]. + /// /// *NOTE:* If getting a value for an option or positional argument that allows multiples, /// prefer [`Arg::values_of_lossy`] as `value_of_lossy()` will only return the *first* value. /// @@ -171,6 +174,8 @@ impl ArgMatches { /// Rust are guaranteed to be valid UTF-8, a valid filename on a Unix system as an argument /// value may contain invalid UTF-8 code points. /// + /// *NOTE:* Recommend having set [`ArgSettings::AllowInvalidUtf8`][crate::ArgSettings::AllowInvalidUtf8]. + /// /// *NOTE:* If getting a value for an option or positional argument that allows multiples, /// prefer [`ArgMatches::values_of_os`] as `Arg::value_of_os` will only return the *first* /// value. @@ -208,7 +213,8 @@ impl ArgMatches { /// /// # Panics /// - /// This method will panic if any of the values contain invalid UTF-8 code points. + /// This method will [`panic!`] if the value is invalid UTF-8. See + /// [`ArgSettings::AllowInvalidUtf8`][crate::ArgSettings::AllowInvalidUtf8]. /// /// # Examples /// @@ -260,6 +266,8 @@ impl ArgMatches { /// it returns `None`. A lossy value is one where if it contains invalid UTF-8 code points, /// those invalid points will be replaced with `\u{FFFD}` /// + /// *NOTE:* Recommend having set [`ArgSettings::AllowInvalidUtf8`][crate::ArgSettings::AllowInvalidUtf8]. + /// /// # Examples /// #[cfg_attr(not(unix), doc = " ```ignore")] @@ -294,6 +302,8 @@ impl ArgMatches { /// valid UTF-8 code points. Since [`String`]s in Rust are guaranteed to be valid UTF-8, a valid /// filename as an argument value on Linux (for example) may contain invalid UTF-8 code points. /// + /// *NOTE:* Recommend having set [`ArgSettings::AllowInvalidUtf8`][crate::ArgSettings::AllowInvalidUtf8]. + /// /// # Examples /// #[cfg_attr(not(unix), doc = " ```ignore")] @@ -340,7 +350,8 @@ impl ArgMatches { /// /// # Panics /// - /// This method will [`panic!`] if the value contains invalid UTF-8 code points. + /// This method will [`panic!`] if the value is invalid UTF-8. See + /// [`ArgSettings::AllowInvalidUtf8`][crate::ArgSettings::AllowInvalidUtf8]. /// /// # Examples /// @@ -395,7 +406,8 @@ impl ArgMatches { /// /// # Panics /// - /// This method will [`panic!`] if the value contains invalid UTF-8 code points. + /// This method will [`panic!`] if the value is invalid UTF-8. See + /// [`ArgSettings::AllowInvalidUtf8`][crate::ArgSettings::AllowInvalidUtf8]. /// /// # Examples /// @@ -432,7 +444,8 @@ impl ArgMatches { /// /// # Panics /// - /// This method will [`panic!`] if any of the values contains invalid UTF-8 code points. + /// This method will [`panic!`] if the value is invalid UTF-8. See + /// [`ArgSettings::AllowInvalidUtf8`][crate::ArgSettings::AllowInvalidUtf8]. /// /// # Examples /// @@ -485,7 +498,8 @@ impl ArgMatches { /// /// # Panics /// - /// This method will [`panic!`] if any of the values contains invalid UTF-8 code points. + /// This method will [`panic!`] if the value is invalid UTF-8. See + /// [`ArgSettings::AllowInvalidUtf8`][crate::ArgSettings::AllowInvalidUtf8]. /// /// # Examples /// diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 53b705363a76..42cb19c30225 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -688,13 +688,10 @@ impl<'help, 'app> Parser<'help, 'app> { let sc_name = match arg_os.to_str() { Some(s) => s.to_string(), None => { - if self.is_set(AS::StrictUtf8) { - return Err(ClapError::invalid_utf8( - Usage::new(self).create_usage_with_title(&[]), - self.app.color(), - )); - } - arg_os.to_string_lossy().into_owned() + return Err(ClapError::invalid_utf8( + Usage::new(self).create_usage_with_title(&[]), + self.app.color(), + )); } }; @@ -702,7 +699,9 @@ impl<'help, 'app> Parser<'help, 'app> { let mut sc_m = ArgMatcher::default(); while let Some((v, _)) = it.next() { - if self.is_set(AS::StrictUtf8) && v.to_str().is_none() { + if !self.is_set(AS::AllowInvalidUtf8ForExternalSubcommands) + && v.to_str().is_none() + { return Err(ClapError::invalid_utf8( Usage::new(self).create_usage_with_title(&[]), self.app.color(), diff --git a/src/parse/validator.rs b/src/parse/validator.rs index d2cae08777ca..6bd650ed13c3 100644 --- a/src/parse/validator.rs +++ b/src/parse/validator.rs @@ -84,7 +84,7 @@ impl<'help, 'app, 'parser> Validator<'help, 'app, 'parser> { ) -> ClapResult<()> { debug!("Validator::validate_arg_values: arg={:?}", arg.name); for val in ma.vals_flatten() { - if self.p.is_set(AS::StrictUtf8) && val.to_str().is_none() { + if !arg.is_set(ArgSettings::AllowInvalidUtf8) && val.to_str().is_none() { debug!( "Validator::validate_arg_values: invalid UTF-8 found in val {:?}", val diff --git a/tests/app_settings.rs b/tests/app_settings.rs index 9e953f108dab..15867eb9ab13 100644 --- a/tests/app_settings.rs +++ b/tests/app_settings.rs @@ -595,13 +595,9 @@ fn unset_setting() { #[test] fn unset_settings() { let m = App::new("unset_settings"); - assert!(&m.is_set(AppSettings::AllowInvalidUtf8)); assert!(&m.is_set(AppSettings::ColorAuto)); - let m = m - .unset_global_setting(AppSettings::AllowInvalidUtf8) - .unset_global_setting(AppSettings::ColorAuto); - assert!(!m.is_set(AppSettings::AllowInvalidUtf8), "{:#?}", m); + let m = m.unset_global_setting(AppSettings::ColorAuto); assert!(!m.is_set(AppSettings::ColorAuto), "{:#?}", m); } diff --git a/tests/utf8.rs b/tests/utf8.rs index 474ce172bec4..95cfd9dca1fa 100644 --- a/tests/utf8.rs +++ b/tests/utf8.rs @@ -7,8 +7,7 @@ use std::os::unix::ffi::OsStringExt; #[test] fn invalid_utf8_strict_positional() { let m = App::new("bad_utf8") - .arg(Arg::from(" 'some arg'")) - .setting(AppSettings::StrictUtf8) + .arg(Arg::new("arg")) .try_get_matches_from(vec![OsString::from(""), OsString::from_vec(vec![0xe9])]); assert!(m.is_err()); assert_eq!(m.unwrap_err().kind, ErrorKind::InvalidUtf8); @@ -17,8 +16,7 @@ fn invalid_utf8_strict_positional() { #[test] fn invalid_utf8_strict_option_short_space() { let m = App::new("bad_utf8") - .arg(Arg::from("-a, --arg 'some arg'")) - .setting(AppSettings::StrictUtf8) + .arg(Arg::new("arg").short('a').long("arg").takes_value(true)) .try_get_matches_from(vec![ OsString::from(""), OsString::from("-a"), @@ -31,8 +29,7 @@ fn invalid_utf8_strict_option_short_space() { #[test] fn invalid_utf8_strict_option_short_equals() { let m = App::new("bad_utf8") - .arg(Arg::from("-a, --arg 'some arg'")) - .setting(AppSettings::StrictUtf8) + .arg(Arg::new("arg").short('a').long("arg").takes_value(true)) .try_get_matches_from(vec![ OsString::from(""), OsString::from_vec(vec![0x2d, 0x61, 0x3d, 0xe9]), @@ -44,8 +41,7 @@ fn invalid_utf8_strict_option_short_equals() { #[test] fn invalid_utf8_strict_option_short_no_space() { let m = App::new("bad_utf8") - .arg(Arg::from("-a, --arg 'some arg'")) - .setting(AppSettings::StrictUtf8) + .arg(Arg::new("arg").short('a').long("arg").takes_value(true)) .try_get_matches_from(vec![ OsString::from(""), OsString::from_vec(vec![0x2d, 0x61, 0xe9]), @@ -57,8 +53,7 @@ fn invalid_utf8_strict_option_short_no_space() { #[test] fn invalid_utf8_strict_option_long_space() { let m = App::new("bad_utf8") - .arg(Arg::from("-a, --arg 'some arg'")) - .setting(AppSettings::StrictUtf8) + .arg(Arg::new("arg").short('a').long("arg").takes_value(true)) .try_get_matches_from(vec![ OsString::from(""), OsString::from("--arg"), @@ -71,8 +66,7 @@ fn invalid_utf8_strict_option_long_space() { #[test] fn invalid_utf8_strict_option_long_equals() { let m = App::new("bad_utf8") - .arg(Arg::from("-a, --arg 'some arg'")) - .setting(AppSettings::StrictUtf8) + .arg(Arg::new("arg").short('a').long("arg").takes_value(true)) .try_get_matches_from(vec![ OsString::from(""), OsString::from_vec(vec![0x2d, 0x2d, 0x61, 0x72, 0x67, 0x3d, 0xe9]), @@ -84,7 +78,7 @@ fn invalid_utf8_strict_option_long_equals() { #[test] fn invalid_utf8_lossy_positional() { let r = App::new("bad_utf8") - .arg(Arg::from(" 'some arg'")) + .arg(Arg::new("arg").allow_invalid_utf8(true)) .try_get_matches_from(vec![OsString::from(""), OsString::from_vec(vec![0xe9])]); assert!(r.is_ok()); let m = r.unwrap(); @@ -95,7 +89,13 @@ fn invalid_utf8_lossy_positional() { #[test] fn invalid_utf8_lossy_option_short_space() { let r = App::new("bad_utf8") - .arg(Arg::from("-a, --arg 'some arg'")) + .arg( + Arg::new("arg") + .short('a') + .long("arg") + .takes_value(true) + .allow_invalid_utf8(true), + ) .try_get_matches_from(vec![ OsString::from(""), OsString::from("-a"), @@ -110,7 +110,13 @@ fn invalid_utf8_lossy_option_short_space() { #[test] fn invalid_utf8_lossy_option_short_equals() { let r = App::new("bad_utf8") - .arg(Arg::from("-a, --arg 'some arg'")) + .arg( + Arg::new("arg") + .short('a') + .long("arg") + .takes_value(true) + .allow_invalid_utf8(true), + ) .try_get_matches_from(vec![ OsString::from(""), OsString::from_vec(vec![0x2d, 0x61, 0x3d, 0xe9]), @@ -124,7 +130,13 @@ fn invalid_utf8_lossy_option_short_equals() { #[test] fn invalid_utf8_lossy_option_short_no_space() { let r = App::new("bad_utf8") - .arg(Arg::from("-a, --arg 'some arg'")) + .arg( + Arg::new("arg") + .short('a') + .long("arg") + .takes_value(true) + .allow_invalid_utf8(true), + ) .try_get_matches_from(vec![ OsString::from(""), OsString::from_vec(vec![0x2d, 0x61, 0xe9]), @@ -138,7 +150,13 @@ fn invalid_utf8_lossy_option_short_no_space() { #[test] fn invalid_utf8_lossy_option_long_space() { let r = App::new("bad_utf8") - .arg(Arg::from("-a, --arg 'some arg'")) + .arg( + Arg::new("arg") + .short('a') + .long("arg") + .takes_value(true) + .allow_invalid_utf8(true), + ) .try_get_matches_from(vec![ OsString::from(""), OsString::from("--arg"), @@ -153,7 +171,13 @@ fn invalid_utf8_lossy_option_long_space() { #[test] fn invalid_utf8_lossy_option_long_equals() { let r = App::new("bad_utf8") - .arg(Arg::from("-a, --arg 'some arg'")) + .arg( + Arg::new("arg") + .short('a') + .long("arg") + .takes_value(true) + .allow_invalid_utf8(true), + ) .try_get_matches_from(vec![ OsString::from(""), OsString::from_vec(vec![0x2d, 0x2d, 0x61, 0x72, 0x67, 0x3d, 0xe9]), @@ -167,7 +191,7 @@ fn invalid_utf8_lossy_option_long_equals() { #[test] fn invalid_utf8_positional() { let r = App::new("bad_utf8") - .arg(Arg::from(" 'some arg'")) + .arg(Arg::new("arg").allow_invalid_utf8(true)) .try_get_matches_from(vec![OsString::from(""), OsString::from_vec(vec![0xe9])]); assert!(r.is_ok()); let m = r.unwrap(); @@ -181,7 +205,13 @@ fn invalid_utf8_positional() { #[test] fn invalid_utf8_option_short_space() { let r = App::new("bad_utf8") - .arg(Arg::from("-a, --arg 'some arg'")) + .arg( + Arg::new("arg") + .short('a') + .long("arg") + .takes_value(true) + .allow_invalid_utf8(true), + ) .try_get_matches_from(vec![ OsString::from(""), OsString::from("-a"), @@ -199,7 +229,13 @@ fn invalid_utf8_option_short_space() { #[test] fn invalid_utf8_option_short_equals() { let r = App::new("bad_utf8") - .arg(Arg::from("-a, --arg 'some arg'")) + .arg( + Arg::new("arg") + .short('a') + .long("arg") + .takes_value(true) + .allow_invalid_utf8(true), + ) .try_get_matches_from(vec![ OsString::from(""), OsString::from_vec(vec![0x2d, 0x61, 0x3d, 0xe9]), @@ -216,7 +252,13 @@ fn invalid_utf8_option_short_equals() { #[test] fn invalid_utf8_option_short_no_space() { let r = App::new("bad_utf8") - .arg(Arg::from("-a, --arg 'some arg'")) + .arg( + Arg::new("arg") + .short('a') + .long("arg") + .takes_value(true) + .allow_invalid_utf8(true), + ) .try_get_matches_from(vec![ OsString::from(""), OsString::from_vec(vec![0x2d, 0x61, 0xe9]), @@ -233,7 +275,13 @@ fn invalid_utf8_option_short_no_space() { #[test] fn invalid_utf8_option_long_space() { let r = App::new("bad_utf8") - .arg(Arg::from("-a, --arg 'some arg'")) + .arg( + Arg::new("arg") + .short('a') + .long("arg") + .takes_value(true) + .allow_invalid_utf8(true), + ) .try_get_matches_from(vec![ OsString::from(""), OsString::from("--arg"), @@ -251,7 +299,13 @@ fn invalid_utf8_option_long_space() { #[test] fn invalid_utf8_option_long_equals() { let r = App::new("bad_utf8") - .arg(Arg::from("-a, --arg 'some arg'")) + .arg( + Arg::new("arg") + .short('a') + .long("arg") + .takes_value(true) + .allow_invalid_utf8(true), + ) .try_get_matches_from(vec![ OsString::from(""), OsString::from_vec(vec![0x2d, 0x2d, 0x61, 0x72, 0x67, 0x3d, 0xe9]), @@ -268,7 +322,6 @@ fn invalid_utf8_option_long_equals() { #[test] fn refuse_invalid_utf8_subcommand_with_allow_external_subcommands() { let m = App::new("bad_utf8") - .setting(AppSettings::StrictUtf8) .setting(AppSettings::AllowExternalSubcommands) .try_get_matches_from(vec![ OsString::from(""), @@ -279,29 +332,9 @@ fn refuse_invalid_utf8_subcommand_with_allow_external_subcommands() { assert_eq!(m.unwrap_err().kind, ErrorKind::InvalidUtf8); } -#[test] -fn allow_invalid_utf8_subcommand_with_allow_external_subcommands() { - let m = App::new("bad_utf8") - .setting(AppSettings::AllowExternalSubcommands) - .try_get_matches_from(vec![ - OsString::from(""), - OsString::from_vec(vec![0xe9]), - OsString::from("normal"), - ]); - assert!(m.is_ok()); - let m = m.unwrap(); - let (_subcommand, args) = m.subcommand().unwrap(); - let args = args.values_of_os("").unwrap().collect::>(); - assert_eq!(args, vec![OsString::from("normal"),]); - // TODO: currently subcommand can only be &str, which means following code - // will panic. - // assert_eq!(_subcommand, OsString::from_vec(vec![0xe9])); -} - #[test] fn refuse_invalid_utf8_subcommand_args_with_allow_external_subcommands() { let m = App::new("bad_utf8") - .setting(AppSettings::StrictUtf8) .setting(AppSettings::AllowExternalSubcommands) .try_get_matches_from(vec![ OsString::from(""), @@ -318,6 +351,7 @@ fn refuse_invalid_utf8_subcommand_args_with_allow_external_subcommands() { fn allow_invalid_utf8_subcommand_args_with_allow_external_subcommands() { let m = App::new("bad_utf8") .setting(AppSettings::AllowExternalSubcommands) + .setting(AppSettings::AllowInvalidUtf8ForExternalSubcommands) .try_get_matches_from(vec![ OsString::from(""), OsString::from("subcommand"),