Skip to content
This repository has been archived by the owner on Jan 1, 2022. It is now read-only.

fix: Detect when AllowInvalidUtf8 is needed #41

Merged
merged 4 commits into from
Nov 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
29 changes: 19 additions & 10 deletions src/parse/arg_matcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,11 +126,20 @@ impl ArgMatcher {
self.0.args.iter()
}

pub(crate) fn inc_occurrence_of(&mut self, arg: &Id, ci: bool) {
debug!("ArgMatcher::inc_occurrence_of: arg={:?}", arg);
let ma = self.entry(arg).or_insert(MatchedArg::new());
pub(crate) fn inc_occurrence_of_arg(&mut self, arg: &Arg) {
let id = &arg.id;
debug!("ArgMatcher::inc_occurrence_of_arg: id={:?}", id);
let ma = self.entry(id).or_insert(MatchedArg::new());
ma.set_ty(ValueType::CommandLine);
ma.set_ignore_case(arg.is_set(ArgSettings::IgnoreCase));
ma.invalid_utf8_allowed(arg.is_set(ArgSettings::AllowInvalidUtf8));
ma.occurs += 1;
}

pub(crate) fn inc_occurrence_of_group(&mut self, id: &Id) {
debug!("ArgMatcher::inc_occurrence_of_group: id={:?}", id);
let ma = self.entry(id).or_insert(MatchedArg::new());
ma.set_ty(ValueType::CommandLine);
ma.set_case_insensitive(ci);
ma.occurs += 1;
}

Expand All @@ -142,7 +151,7 @@ impl ArgMatcher {
}
}

pub(crate) fn push_val_to(&mut self, arg: &Id, val: OsString, ty: ValueType) {
fn push_val_to(&mut self, arg: &Id, val: OsString, ty: ValueType) {
// We will manually inc occurrences later(for flexibility under
// specific circumstances, like only add one occurrence for flag
// when we met: `--flag=one,two`).
Expand All @@ -151,15 +160,15 @@ impl ArgMatcher {
ma.push_val(val);
}

pub(crate) fn new_val_group(&mut self, arg: &Id) {
fn append_val_to(&mut self, arg: &Id, val: OsString, ty: ValueType) {
let ma = self.entry(arg).or_default();
ma.new_val_group();
ma.set_ty(ty);
ma.append_val(val);
}

pub(crate) fn append_val_to(&mut self, arg: &Id, val: OsString, ty: ValueType) {
pub(crate) fn new_val_group(&mut self, arg: &Id) {
let ma = self.entry(arg).or_default();
ma.set_ty(ty);
ma.append_val(val);
ma.new_val_group();
}

pub(crate) fn add_index_to(&mut self, arg: &Id, idx: usize, ty: ValueType) {
Expand Down
164 changes: 86 additions & 78 deletions src/parse/matches/arg_matches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -160,12 +160,10 @@ impl ArgMatches {
/// [`default_value`]: crate::Arg::default_value()
/// [`occurrences_of`]: crate::ArgMatches::occurrences_of()
pub fn value_of<T: Key>(&self, id: T) -> Option<&str> {
if let Some(arg) = self.get_arg(&Id::from(id)) {
if let Some(v) = arg.first() {
return Some(v.to_str().expect(INVALID_UTF8));
}
}
None
let arg = self.get_arg(&Id::from(id))?;
assert_utf8_validation(arg);
let v = arg.first()?;
Some(v.to_str().expect(INVALID_UTF8))
}

/// Gets the lossy value of a specific option or positional argument.
Expand Down Expand Up @@ -210,12 +208,10 @@ impl ArgMatches {
/// [`occurrences_of`]: ArgMatches::occurrences_of()
/// [`Arg::values_of_lossy`]: ArgMatches::values_of_lossy()
pub fn value_of_lossy<T: Key>(&self, id: T) -> Option<Cow<'_, str>> {
if let Some(arg) = self.get_arg(&Id::from(id)) {
if let Some(v) = arg.first() {
return Some(v.to_string_lossy());
}
}
None
let arg = self.get_arg(&Id::from(id))?;
assert_no_utf8_validation(arg);
let v = arg.first()?;
Some(v.to_string_lossy())
}

/// Get the `OsStr` value of a specific option or positional argument.
Expand Down Expand Up @@ -261,8 +257,10 @@ impl ArgMatches {
/// [`occurrences_of`]: ArgMatches::occurrences_of()
/// [`ArgMatches::values_of_os`]: ArgMatches::values_of_os()
pub fn value_of_os<T: Key>(&self, id: T) -> Option<&OsStr> {
self.get_arg(&Id::from(id))
.and_then(|arg| arg.first().map(OsString::as_os_str))
let arg = self.get_arg(&Id::from(id))?;
assert_no_utf8_validation(arg);
let v = arg.first()?;
Some(v.as_os_str())
}

/// Get an [`Iterator`] over [values] of a specific option or positional argument.
Expand Down Expand Up @@ -296,33 +294,28 @@ impl ArgMatches {
/// [values]: Values
/// [`Iterator`]: std::iter::Iterator
pub fn values_of<T: Key>(&self, id: T) -> Option<Values> {
self.get_arg(&Id::from(id)).map(|arg| {
fn to_str_slice(o: &OsString) -> &str {
o.to_str().expect(INVALID_UTF8)
}

Values {
iter: arg.vals_flatten().map(to_str_slice),
}
})
let arg = self.get_arg(&Id::from(id))?;
assert_utf8_validation(arg);
fn to_str_slice(o: &OsString) -> &str {
o.to_str().expect(INVALID_UTF8)
}
let v = Values {
iter: arg.vals_flatten().map(to_str_slice),
};
Some(v)
}

/// Placeholder documentation.
#[cfg(feature = "unstable-grouped")]
pub fn grouped_values_of<T: Key>(&self, id: T) -> Option<GroupedValues> {
#[allow(clippy::type_complexity)]
let arg_values: for<'a> fn(
&'a MatchedArg,
) -> Map<
Iter<'a, Vec<OsString>>,
fn(&Vec<OsString>) -> Vec<&str>,
> = |arg| {
arg.vals()
.map(|g| g.iter().map(|x| x.to_str().expect(INVALID_UTF8)).collect())
let arg = self.get_arg(&Id::from(id))?;
assert_utf8_validation(arg);
let v = GroupedValues {
iter: arg
.vals()
.map(|g| g.iter().map(|x| x.to_str().expect(INVALID_UTF8)).collect()),
};
self.get_arg(&Id::from(id))
.map(arg_values)
.map(|iter| GroupedValues { iter })
Some(v)
}

/// Get the lossy values of a specific option or positional argument.
Expand Down Expand Up @@ -362,11 +355,13 @@ impl ArgMatches {
/// assert_eq!(itr.next(), None);
/// ```
pub fn values_of_lossy<T: Key>(&self, id: T) -> Option<Vec<String>> {
self.get_arg(&Id::from(id)).map(|arg| {
arg.vals_flatten()
.map(|v| v.to_string_lossy().into_owned())
.collect()
})
let arg = self.get_arg(&Id::from(id))?;
assert_no_utf8_validation(arg);
let v = arg
.vals_flatten()
.map(|v| v.to_string_lossy().into_owned())
.collect();
Some(v)
}

/// Get an [`Iterator`] over [`OsStr`] [values] of a specific option or positional argument.
Expand Down Expand Up @@ -412,13 +407,15 @@ impl ArgMatches {
/// [values]: OsValues
/// [`String`]: std::string::String
pub fn values_of_os<T: Key>(&self, id: T) -> Option<OsValues> {
let arg = self.get_arg(&Id::from(id))?;
assert_no_utf8_validation(arg);
fn to_str_slice(o: &OsString) -> &OsStr {
o
}

self.get_arg(&Id::from(id)).map(|arg| OsValues {
let v = OsValues {
iter: arg.vals_flatten().map(to_str_slice),
})
};
Some(v)
}

/// Parse the value (with [`FromStr`]) of a specific option or positional argument.
Expand Down Expand Up @@ -463,18 +460,17 @@ impl ArgMatches {
R: FromStr,
<R as FromStr>::Err: Display,
{
if let Some(v) = self.value_of(name) {
v.parse::<R>().map_err(|e| {
let message = format!(
"The argument '{}' isn't a valid value for '{}': {}",
v, name, e
);

Error::value_validation_without_app(name.to_string(), v.to_string(), message.into())
})
} else {
Err(Error::argument_not_found_auto(name.to_string()))
}
let v = self
.value_of(name)
.ok_or_else(|| Error::argument_not_found_auto(name.to_string()))?;
v.parse::<R>().map_err(|e| {
let message = format!(
"The argument '{}' isn't a valid value for '{}': {}",
v, name, e
);

Error::value_validation_without_app(name.to_string(), v.to_string(), message.into())
})
}

/// Parse the value (with [`FromStr`]) of a specific option or positional argument.
Expand Down Expand Up @@ -554,22 +550,17 @@ impl ArgMatches {
R: FromStr,
<R as FromStr>::Err: Display,
{
if let Some(vals) = self.values_of(name) {
vals.map(|v| {
v.parse::<R>().map_err(|e| {
let message = format!("The argument '{}' isn't a valid value: {}", v, e);

Error::value_validation_without_app(
name.to_string(),
v.to_string(),
message.into(),
)
})
let v = self
.values_of(name)
.ok_or_else(|| Error::argument_not_found_auto(name.to_string()))?;
v.map(|v| {
v.parse::<R>().map_err(|e| {
let message = format!("The argument '{}' isn't a valid value: {}", v, e);

Error::value_validation_without_app(name.to_string(), v.to_string(), message.into())
})
.collect()
} else {
Err(Error::argument_not_found_auto(name.to_string()))
}
})
.collect()
}

/// Parse the values (with [`FromStr`]) of a specific option or positional argument.
Expand Down Expand Up @@ -825,12 +816,9 @@ impl ArgMatches {
/// ```
/// [delimiter]: crate::Arg::value_delimiter()
pub fn index_of<T: Key>(&self, id: T) -> Option<usize> {
if let Some(arg) = self.get_arg(&Id::from(id)) {
if let Some(i) = arg.get_index(0) {
return Some(i);
}
}
None
let arg = self.get_arg(&Id::from(id))?;
let i = arg.get_index(0)?;
Some(i)
}

/// All indices an argument appeared at when parsing.
Expand Down Expand Up @@ -910,9 +898,11 @@ impl ArgMatches {
/// [`ArgMatches::index_of`]: ArgMatches::index_of()
/// [delimiter]: Arg::value_delimiter()
pub fn indices_of<T: Key>(&self, id: T) -> Option<Indices<'_>> {
self.get_arg(&Id::from(id)).map(|arg| Indices {
let arg = self.get_arg(&Id::from(id))?;
let i = Indices {
iter: arg.indices(),
})
};
Some(i)
}

/// The `ArgMatches` for the current [subcommand].
Expand Down Expand Up @@ -1247,6 +1237,24 @@ impl<'a> Default for Indices<'a> {
}
}

#[track_caller]
#[inline]
fn assert_utf8_validation(arg: &MatchedArg) {
debug_assert!(
matches!(arg.is_invalid_utf8_allowed(), None | Some(false)),
"Must use `_os` lookups with `Arg::allow_invalid_utf8`"
);
}

#[track_caller]
#[inline]
fn assert_no_utf8_validation(arg: &MatchedArg) {
debug_assert!(
matches!(arg.is_invalid_utf8_allowed(), None | Some(true)),
"Must use `Arg::allow_invalid_utf8` with `_os` lookups"
);
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
20 changes: 15 additions & 5 deletions src/parse/matches/matched_arg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ pub(crate) struct MatchedArg {
pub(crate) ty: ValueType,
indices: Vec<usize>,
vals: Vec<Vec<OsString>>,
case_insensitive: bool,
ignore_case: bool,
invalid_utf8_allowed: Option<bool>,
}

impl Default for MatchedArg {
Expand All @@ -40,7 +41,8 @@ impl MatchedArg {
ty: ValueType::Unknown,
indices: Vec::new(),
vals: Vec::new(),
case_insensitive: false,
ignore_case: false,
invalid_utf8_allowed: None,
}
}

Expand Down Expand Up @@ -127,7 +129,7 @@ impl MatchedArg {

pub(crate) fn contains_val(&self, val: &str) -> bool {
self.vals_flatten().any(|v| {
if self.case_insensitive {
if self.ignore_case {
// If `v` isn't utf8, it can't match `val`, so `OsStr::to_str` should be fine
v.to_str().map_or(false, |v| eq_ignore_case(v, val))
} else {
Expand All @@ -140,8 +142,16 @@ impl MatchedArg {
self.ty = ty;
}

pub(crate) fn set_case_insensitive(&mut self, ci: bool) {
self.case_insensitive = ci;
pub(crate) fn set_ignore_case(&mut self, yes: bool) {
self.ignore_case = yes;
}

pub(crate) fn invalid_utf8_allowed(&mut self, yes: bool) {
self.invalid_utf8_allowed = Some(yes);
}

pub(crate) fn is_invalid_utf8_allowed(&self) -> Option<bool> {
self.invalid_utf8_allowed
}
}

Expand Down
13 changes: 8 additions & 5 deletions src/parse/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -706,9 +706,9 @@ impl<'help, 'app> Parser<'help, 'app> {
let mut sc_m = ArgMatcher::new(self.app);

while let Some((v, _)) = it.next() {
if !self.is_set(AS::AllowInvalidUtf8ForExternalSubcommands)
&& v.to_str().is_none()
{
let allow_invalid_utf8 =
self.is_set(AS::AllowInvalidUtf8ForExternalSubcommands);
if !allow_invalid_utf8 && v.to_str().is_none() {
return Err(ClapError::invalid_utf8(
self.app,
Usage::new(self).create_usage_with_title(&[]),
Expand All @@ -720,6 +720,9 @@ impl<'help, 'app> Parser<'help, 'app> {
ValueType::CommandLine,
false,
);
sc_m.get_mut(&Id::empty_hash())
.expect("just inserted")
.invalid_utf8_allowed(allow_invalid_utf8);
}

matcher.subcommand(SubCommand {
Expand Down Expand Up @@ -1803,10 +1806,10 @@ impl<'help, 'app> Parser<'help, 'app> {

/// Increase occurrence of specific argument and the grouped arg it's in.
fn inc_occurrence_of_arg(&self, matcher: &mut ArgMatcher, arg: &Arg<'help>) {
matcher.inc_occurrence_of(&arg.id, arg.is_set(ArgSettings::IgnoreCase));
matcher.inc_occurrence_of_arg(arg);
// Increment or create the group "args"
for group in self.app.groups_for_arg(&arg.id) {
matcher.inc_occurrence_of(&group, false);
matcher.inc_occurrence_of_group(&group);
}
}
}
Expand Down