From b8c34c3b57806ee3911183ed45a850a062efc134 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Mon, 29 Nov 2021 13:16:03 -0600 Subject: [PATCH] fix: Detect when AllowInvalidUtf8 is needed Fixes #36 --- src/parse/arg_matcher.rs | 13 +++-- src/parse/matches/arg_matches.rs | 25 +++++++++ src/parse/matches/matched_arg.rs | 10 ++++ src/parse/parser.rs | 9 ++- tests/app_settings.rs | 2 + tests/default_vals.rs | 2 + tests/utf8.rs | 95 +++++++++++++++++++++++++++++++- 7 files changed, 146 insertions(+), 10 deletions(-) diff --git a/src/parse/arg_matcher.rs b/src/parse/arg_matcher.rs index 538df315..058eaaf4 100644 --- a/src/parse/arg_matcher.rs +++ b/src/parse/arg_matcher.rs @@ -132,6 +132,7 @@ impl ArgMatcher { let ma = self.entry(id).or_insert(MatchedArg::new()); ma.set_ty(ValueType::CommandLine); ma.set_ignore_case(arg.is_set(ArgSettings::IgnoreCase)); + ma.invalid_utf8_allowed(arg.is_set(ArgSettings::AllowInvalidUtf8)); ma.occurs += 1; } @@ -150,7 +151,7 @@ impl ArgMatcher { } } - pub(crate) fn push_val_to(&mut self, arg: &Id, val: OsString, ty: ValueType) { + fn push_val_to(&mut self, arg: &Id, val: OsString, ty: ValueType) { // We will manually inc occurrences later(for flexibility under // specific circumstances, like only add one occurrence for flag // when we met: `--flag=one,two`). @@ -159,15 +160,15 @@ impl ArgMatcher { ma.push_val(val); } - pub(crate) fn new_val_group(&mut self, arg: &Id) { + fn append_val_to(&mut self, arg: &Id, val: OsString, ty: ValueType) { let ma = self.entry(arg).or_default(); - ma.new_val_group(); + ma.set_ty(ty); + ma.append_val(val); } - pub(crate) fn append_val_to(&mut self, arg: &Id, val: OsString, ty: ValueType) { + pub(crate) fn new_val_group(&mut self, arg: &Id) { let ma = self.entry(arg).or_default(); - ma.set_ty(ty); - ma.append_val(val); + ma.new_val_group(); } pub(crate) fn add_index_to(&mut self, arg: &Id, idx: usize, ty: ValueType) { diff --git a/src/parse/matches/arg_matches.rs b/src/parse/matches/arg_matches.rs index 65b36705..a7d53737 100644 --- a/src/parse/matches/arg_matches.rs +++ b/src/parse/matches/arg_matches.rs @@ -161,6 +161,7 @@ impl ArgMatches { /// [`occurrences_of`]: crate::ArgMatches::occurrences_of() pub fn value_of(&self, id: T) -> Option<&str> { let arg = self.get_arg(&Id::from(id))?; + assert_utf8_validation(arg); let v = arg.first()?; Some(v.to_str().expect(INVALID_UTF8)) } @@ -208,6 +209,7 @@ impl ArgMatches { /// [`Arg::values_of_lossy`]: ArgMatches::values_of_lossy() pub fn value_of_lossy(&self, id: T) -> Option> { let arg = self.get_arg(&Id::from(id))?; + assert_no_utf8_validation(arg); let v = arg.first()?; Some(v.to_string_lossy()) } @@ -256,6 +258,7 @@ impl ArgMatches { /// [`ArgMatches::values_of_os`]: ArgMatches::values_of_os() pub fn value_of_os(&self, id: T) -> Option<&OsStr> { let arg = self.get_arg(&Id::from(id))?; + assert_no_utf8_validation(arg); let v = arg.first()?; Some(v.as_os_str()) } @@ -292,6 +295,7 @@ impl ArgMatches { /// [`Iterator`]: std::iter::Iterator pub fn values_of(&self, id: T) -> Option { let arg = self.get_arg(&Id::from(id))?; + assert_utf8_validation(arg); fn to_str_slice(o: &OsString) -> &str { o.to_str().expect(INVALID_UTF8) } @@ -305,6 +309,7 @@ impl ArgMatches { #[cfg(feature = "unstable-grouped")] pub fn grouped_values_of(&self, id: T) -> Option { let arg = self.get_arg(&Id::from(id))?; + assert_utf8_validation(arg); let v = GroupedValues { iter: arg .vals() @@ -351,6 +356,7 @@ impl ArgMatches { /// ``` pub fn values_of_lossy(&self, id: T) -> Option> { let arg = self.get_arg(&Id::from(id))?; + assert_no_utf8_validation(arg); let v = arg .vals_flatten() .map(|v| v.to_string_lossy().into_owned()) @@ -402,6 +408,7 @@ impl ArgMatches { /// [`String`]: std::string::String pub fn values_of_os(&self, id: T) -> Option { let arg = self.get_arg(&Id::from(id))?; + assert_no_utf8_validation(arg); fn to_str_slice(o: &OsString) -> &OsStr { o } @@ -1230,6 +1237,24 @@ impl<'a> Default for Indices<'a> { } } +#[track_caller] +#[inline] +fn assert_utf8_validation(arg: &MatchedArg) { + debug_assert!( + matches!(arg.is_invalid_utf8_allowed(), None | Some(false)), + "Must use `_os` lookups with `Arg::allow_invalid_utf8`" + ); +} + +#[track_caller] +#[inline] +fn assert_no_utf8_validation(arg: &MatchedArg) { + debug_assert!( + matches!(arg.is_invalid_utf8_allowed(), None | Some(true)), + "Must use `Arg::allow_invalid_utf8` with `_os` lookups" + ); +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/parse/matches/matched_arg.rs b/src/parse/matches/matched_arg.rs index de82b050..398e9499 100644 --- a/src/parse/matches/matched_arg.rs +++ b/src/parse/matches/matched_arg.rs @@ -25,6 +25,7 @@ pub(crate) struct MatchedArg { indices: Vec, vals: Vec>, ignore_case: bool, + invalid_utf8_allowed: Option, } impl Default for MatchedArg { @@ -41,6 +42,7 @@ impl MatchedArg { indices: Vec::new(), vals: Vec::new(), ignore_case: false, + invalid_utf8_allowed: None, } } @@ -143,6 +145,14 @@ impl MatchedArg { pub(crate) fn set_ignore_case(&mut self, yes: bool) { self.ignore_case = yes; } + + pub(crate) fn invalid_utf8_allowed(&mut self, yes: bool) { + self.invalid_utf8_allowed = Some(yes); + } + + pub(crate) fn is_invalid_utf8_allowed(&self) -> Option { + self.invalid_utf8_allowed + } } #[cfg(test)] diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 1e9209d1..d797da23 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -706,9 +706,9 @@ impl<'help, 'app> Parser<'help, 'app> { let mut sc_m = ArgMatcher::new(self.app); while let Some((v, _)) = it.next() { - if !self.is_set(AS::AllowInvalidUtf8ForExternalSubcommands) - && v.to_str().is_none() - { + let allow_invalid_utf8 = + self.is_set(AS::AllowInvalidUtf8ForExternalSubcommands); + if !allow_invalid_utf8 && v.to_str().is_none() { return Err(ClapError::invalid_utf8( self.app, Usage::new(self).create_usage_with_title(&[]), @@ -720,6 +720,9 @@ impl<'help, 'app> Parser<'help, 'app> { ValueType::CommandLine, false, ); + sc_m.get_mut(&Id::empty_hash()) + .expect("just inserted") + .invalid_utf8_allowed(allow_invalid_utf8); } matcher.subcommand(SubCommand { diff --git a/tests/app_settings.rs b/tests/app_settings.rs index db65025d..7e03807a 100644 --- a/tests/app_settings.rs +++ b/tests/app_settings.rs @@ -884,6 +884,7 @@ fn allow_ext_sc_when_sc_required() { let res = App::new("clap-test") .version("v1.4.8") .setting(AppSettings::AllowExternalSubcommands) + .setting(AppSettings::AllowInvalidUtf8ForExternalSubcommands) .setting(AppSettings::SubcommandRequiredElseHelp) .try_get_matches_from(vec!["clap-test", "external-cmd", "foo"]); @@ -903,6 +904,7 @@ fn external_subcommand_looks_like_built_in() { let res = App::new("cargo") .version("1.26.0") .setting(AppSettings::AllowExternalSubcommands) + .setting(AppSettings::AllowInvalidUtf8ForExternalSubcommands) .subcommand(App::new("install")) .try_get_matches_from(vec!["cargo", "install-update", "foo"]); assert!(res.is_ok()); diff --git a/tests/default_vals.rs b/tests/default_vals.rs index d9cc5d8a..16f9c07c 100644 --- a/tests/default_vals.rs +++ b/tests/default_vals.rs @@ -539,6 +539,7 @@ fn multiple_defaults() { Arg::new("files") .long("files") .number_of_values(2) + .allow_invalid_utf8(true) .default_values(&["old", "new"]), ) .try_get_matches_from(vec![""]); @@ -555,6 +556,7 @@ fn multiple_defaults_override() { Arg::new("files") .long("files") .number_of_values(2) + .allow_invalid_utf8(true) .default_values(&["old", "new"]), ) .try_get_matches_from(vec!["", "--files", "other", "mine"]); diff --git a/tests/utf8.rs b/tests/utf8.rs index 05849d20..e47040b2 100644 --- a/tests/utf8.rs +++ b/tests/utf8.rs @@ -1,6 +1,6 @@ #![cfg(not(windows))] -use clap::{App, AppSettings, Arg, ErrorKind}; +use clap::{arg, App, AppSettings, Arg, ErrorKind}; use std::ffi::OsString; use std::os::unix::ffi::OsStringExt; @@ -387,3 +387,96 @@ fn allow_invalid_utf8_subcommand_args_with_allow_external_subcommands() { ] ); } + +#[test] +fn allow_validated_utf8_value_of() { + let a = App::new("test").arg(arg!(--name )); + let m = a.try_get_matches_from(["test", "--name", "me"]).unwrap(); + let _ = m.value_of("name"); +} + +#[test] +#[should_panic = "Must use `Arg::allow_invalid_utf8` with `_os` lookups"] +fn panic_validated_utf8_value_of_os() { + let a = App::new("test").arg(arg!(--name )); + let m = a.try_get_matches_from(["test", "--name", "me"]).unwrap(); + let _ = m.value_of_os("name"); +} + +#[test] +fn ignore_validated_utf8_with_defaults() { + // For now, we don't check the correct call is used with defaults (due to pain of piping it + // through the code) but we need to make sure we don't erroneously panic. + let a = App::new("test").arg(arg!(--value ).required(false).default_value("foo")); + let m = a.try_get_matches_from(["test"]).unwrap(); + let _ = m.value_of("value"); + let _ = m.value_of_os("value"); +} + +#[test] +fn allow_invalid_utf8_value_of_os() { + let a = App::new("test").arg(arg!(--name ).allow_invalid_utf8(true)); + let m = a.try_get_matches_from(["test", "--name", "me"]).unwrap(); + let _ = m.value_of_os("name"); +} + +#[test] +#[should_panic = "Must use `_os` lookups with `Arg::allow_invalid_utf8`"] +fn panic_invalid_utf8_value_of() { + let a = App::new("test").arg(arg!(--name ).allow_invalid_utf8(true)); + let m = a.try_get_matches_from(["test", "--name", "me"]).unwrap(); + let _ = m.value_of("name"); +} + +#[test] +fn ignore_invalid_utf8_with_defaults() { + // For now, we don't check the correct call is used with defaults (due to pain of piping it + // through the code) but we need to make sure we don't erroneously panic. + let a = App::new("test").arg( + arg!(--value ) + .required(false) + .default_value("foo") + .allow_invalid_utf8(true), + ); + let m = a.try_get_matches_from(["test"]).unwrap(); + let _ = m.value_of("value"); + let _ = m.value_of_os("value"); +} + +#[test] +fn allow_validated_utf8_external_subcommand_values_of() { + let a = App::new("test").setting(AppSettings::AllowExternalSubcommands); + let m = a.try_get_matches_from(vec!["test", "cmd", "arg"]).unwrap(); + let (_ext, args) = m.subcommand().unwrap(); + let _ = args.values_of(""); +} + +#[test] +#[should_panic = "Must use `Arg::allow_invalid_utf8` with `_os` lookups"] +fn panic_validated_utf8_external_subcommand_values_of_os() { + let a = App::new("test").setting(AppSettings::AllowExternalSubcommands); + let m = a.try_get_matches_from(vec!["test", "cmd", "arg"]).unwrap(); + let (_ext, args) = m.subcommand().unwrap(); + let _ = args.values_of_os(""); +} + +#[test] +fn allow_invalid_utf8_external_subcommand_values_of_os() { + let a = App::new("test") + .setting(AppSettings::AllowExternalSubcommands) + .setting(AppSettings::AllowInvalidUtf8ForExternalSubcommands); + let m = a.try_get_matches_from(vec!["test", "cmd", "arg"]).unwrap(); + let (_ext, args) = m.subcommand().unwrap(); + let _ = args.values_of_os(""); +} + +#[test] +#[should_panic = "Must use `_os` lookups with `Arg::allow_invalid_utf8`"] +fn panic_invalid_utf8_external_subcommand_values_of() { + let a = App::new("test") + .setting(AppSettings::AllowExternalSubcommands) + .setting(AppSettings::AllowInvalidUtf8ForExternalSubcommands); + let m = a.try_get_matches_from(vec!["test", "cmd", "arg"]).unwrap(); + let (_ext, args) = m.subcommand().unwrap(); + let _ = args.values_of(""); +}