Skip to content
This repository has been archived by the owner on Jan 1, 2022. It is now read-only.

Commit

Permalink
Merge pull request #41 from epage/utf8
Browse files Browse the repository at this point in the history
fix: Detect when AllowInvalidUtf8 is needed
  • Loading branch information
epage committed Nov 29, 2021
2 parents c7aa471 + b8c34c3 commit e1db873
Show file tree
Hide file tree
Showing 7 changed files with 226 additions and 99 deletions.
29 changes: 19 additions & 10 deletions src/parse/arg_matcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,11 +126,20 @@ impl ArgMatcher {
self.0.args.iter()
}

pub(crate) fn inc_occurrence_of(&mut self, arg: &Id, ci: bool) {
debug!("ArgMatcher::inc_occurrence_of: arg={:?}", arg);
let ma = self.entry(arg).or_insert(MatchedArg::new());
pub(crate) fn inc_occurrence_of_arg(&mut self, arg: &Arg) {
let id = &arg.id;
debug!("ArgMatcher::inc_occurrence_of_arg: id={:?}", id);
let ma = self.entry(id).or_insert(MatchedArg::new());
ma.set_ty(ValueType::CommandLine);
ma.set_ignore_case(arg.is_set(ArgSettings::IgnoreCase));
ma.invalid_utf8_allowed(arg.is_set(ArgSettings::AllowInvalidUtf8));
ma.occurs += 1;
}

pub(crate) fn inc_occurrence_of_group(&mut self, id: &Id) {
debug!("ArgMatcher::inc_occurrence_of_group: id={:?}", id);
let ma = self.entry(id).or_insert(MatchedArg::new());
ma.set_ty(ValueType::CommandLine);
ma.set_case_insensitive(ci);
ma.occurs += 1;
}

Expand All @@ -142,7 +151,7 @@ impl ArgMatcher {
}
}

pub(crate) fn push_val_to(&mut self, arg: &Id, val: OsString, ty: ValueType) {
fn push_val_to(&mut self, arg: &Id, val: OsString, ty: ValueType) {
// We will manually inc occurrences later(for flexibility under
// specific circumstances, like only add one occurrence for flag
// when we met: `--flag=one,two`).
Expand All @@ -151,15 +160,15 @@ impl ArgMatcher {
ma.push_val(val);
}

pub(crate) fn new_val_group(&mut self, arg: &Id) {
fn append_val_to(&mut self, arg: &Id, val: OsString, ty: ValueType) {
let ma = self.entry(arg).or_default();
ma.new_val_group();
ma.set_ty(ty);
ma.append_val(val);
}

pub(crate) fn append_val_to(&mut self, arg: &Id, val: OsString, ty: ValueType) {
pub(crate) fn new_val_group(&mut self, arg: &Id) {
let ma = self.entry(arg).or_default();
ma.set_ty(ty);
ma.append_val(val);
ma.new_val_group();
}

pub(crate) fn add_index_to(&mut self, arg: &Id, idx: usize, ty: ValueType) {
Expand Down
164 changes: 86 additions & 78 deletions src/parse/matches/arg_matches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -160,12 +160,10 @@ impl ArgMatches {
/// [`default_value`]: crate::Arg::default_value()
/// [`occurrences_of`]: crate::ArgMatches::occurrences_of()
pub fn value_of<T: Key>(&self, id: T) -> Option<&str> {
if let Some(arg) = self.get_arg(&Id::from(id)) {
if let Some(v) = arg.first() {
return Some(v.to_str().expect(INVALID_UTF8));
}
}
None
let arg = self.get_arg(&Id::from(id))?;
assert_utf8_validation(arg);
let v = arg.first()?;
Some(v.to_str().expect(INVALID_UTF8))
}

/// Gets the lossy value of a specific option or positional argument.
Expand Down Expand Up @@ -210,12 +208,10 @@ impl ArgMatches {
/// [`occurrences_of`]: ArgMatches::occurrences_of()
/// [`Arg::values_of_lossy`]: ArgMatches::values_of_lossy()
pub fn value_of_lossy<T: Key>(&self, id: T) -> Option<Cow<'_, str>> {
if let Some(arg) = self.get_arg(&Id::from(id)) {
if let Some(v) = arg.first() {
return Some(v.to_string_lossy());
}
}
None
let arg = self.get_arg(&Id::from(id))?;
assert_no_utf8_validation(arg);
let v = arg.first()?;
Some(v.to_string_lossy())
}

/// Get the `OsStr` value of a specific option or positional argument.
Expand Down Expand Up @@ -261,8 +257,10 @@ impl ArgMatches {
/// [`occurrences_of`]: ArgMatches::occurrences_of()
/// [`ArgMatches::values_of_os`]: ArgMatches::values_of_os()
pub fn value_of_os<T: Key>(&self, id: T) -> Option<&OsStr> {
self.get_arg(&Id::from(id))
.and_then(|arg| arg.first().map(OsString::as_os_str))
let arg = self.get_arg(&Id::from(id))?;
assert_no_utf8_validation(arg);
let v = arg.first()?;
Some(v.as_os_str())
}

/// Get an [`Iterator`] over [values] of a specific option or positional argument.
Expand Down Expand Up @@ -296,33 +294,28 @@ impl ArgMatches {
/// [values]: Values
/// [`Iterator`]: std::iter::Iterator
pub fn values_of<T: Key>(&self, id: T) -> Option<Values> {
self.get_arg(&Id::from(id)).map(|arg| {
fn to_str_slice(o: &OsString) -> &str {
o.to_str().expect(INVALID_UTF8)
}

Values {
iter: arg.vals_flatten().map(to_str_slice),
}
})
let arg = self.get_arg(&Id::from(id))?;
assert_utf8_validation(arg);
fn to_str_slice(o: &OsString) -> &str {
o.to_str().expect(INVALID_UTF8)
}
let v = Values {
iter: arg.vals_flatten().map(to_str_slice),
};
Some(v)
}

/// Placeholder documentation.
#[cfg(feature = "unstable-grouped")]
pub fn grouped_values_of<T: Key>(&self, id: T) -> Option<GroupedValues> {
#[allow(clippy::type_complexity)]
let arg_values: for<'a> fn(
&'a MatchedArg,
) -> Map<
Iter<'a, Vec<OsString>>,
fn(&Vec<OsString>) -> Vec<&str>,
> = |arg| {
arg.vals()
.map(|g| g.iter().map(|x| x.to_str().expect(INVALID_UTF8)).collect())
let arg = self.get_arg(&Id::from(id))?;
assert_utf8_validation(arg);
let v = GroupedValues {
iter: arg
.vals()
.map(|g| g.iter().map(|x| x.to_str().expect(INVALID_UTF8)).collect()),
};
self.get_arg(&Id::from(id))
.map(arg_values)
.map(|iter| GroupedValues { iter })
Some(v)
}

/// Get the lossy values of a specific option or positional argument.
Expand Down Expand Up @@ -362,11 +355,13 @@ impl ArgMatches {
/// assert_eq!(itr.next(), None);
/// ```
pub fn values_of_lossy<T: Key>(&self, id: T) -> Option<Vec<String>> {
self.get_arg(&Id::from(id)).map(|arg| {
arg.vals_flatten()
.map(|v| v.to_string_lossy().into_owned())
.collect()
})
let arg = self.get_arg(&Id::from(id))?;
assert_no_utf8_validation(arg);
let v = arg
.vals_flatten()
.map(|v| v.to_string_lossy().into_owned())
.collect();
Some(v)
}

/// Get an [`Iterator`] over [`OsStr`] [values] of a specific option or positional argument.
Expand Down Expand Up @@ -412,13 +407,15 @@ impl ArgMatches {
/// [values]: OsValues
/// [`String`]: std::string::String
pub fn values_of_os<T: Key>(&self, id: T) -> Option<OsValues> {
let arg = self.get_arg(&Id::from(id))?;
assert_no_utf8_validation(arg);
fn to_str_slice(o: &OsString) -> &OsStr {
o
}

self.get_arg(&Id::from(id)).map(|arg| OsValues {
let v = OsValues {
iter: arg.vals_flatten().map(to_str_slice),
})
};
Some(v)
}

/// Parse the value (with [`FromStr`]) of a specific option or positional argument.
Expand Down Expand Up @@ -463,18 +460,17 @@ impl ArgMatches {
R: FromStr,
<R as FromStr>::Err: Display,
{
if let Some(v) = self.value_of(name) {
v.parse::<R>().map_err(|e| {
let message = format!(
"The argument '{}' isn't a valid value for '{}': {}",
v, name, e
);

Error::value_validation_without_app(name.to_string(), v.to_string(), message.into())
})
} else {
Err(Error::argument_not_found_auto(name.to_string()))
}
let v = self
.value_of(name)
.ok_or_else(|| Error::argument_not_found_auto(name.to_string()))?;
v.parse::<R>().map_err(|e| {
let message = format!(
"The argument '{}' isn't a valid value for '{}': {}",
v, name, e
);

Error::value_validation_without_app(name.to_string(), v.to_string(), message.into())
})
}

/// Parse the value (with [`FromStr`]) of a specific option or positional argument.
Expand Down Expand Up @@ -554,22 +550,17 @@ impl ArgMatches {
R: FromStr,
<R as FromStr>::Err: Display,
{
if let Some(vals) = self.values_of(name) {
vals.map(|v| {
v.parse::<R>().map_err(|e| {
let message = format!("The argument '{}' isn't a valid value: {}", v, e);

Error::value_validation_without_app(
name.to_string(),
v.to_string(),
message.into(),
)
})
let v = self
.values_of(name)
.ok_or_else(|| Error::argument_not_found_auto(name.to_string()))?;
v.map(|v| {
v.parse::<R>().map_err(|e| {
let message = format!("The argument '{}' isn't a valid value: {}", v, e);

Error::value_validation_without_app(name.to_string(), v.to_string(), message.into())
})
.collect()
} else {
Err(Error::argument_not_found_auto(name.to_string()))
}
})
.collect()
}

/// Parse the values (with [`FromStr`]) of a specific option or positional argument.
Expand Down Expand Up @@ -825,12 +816,9 @@ impl ArgMatches {
/// ```
/// [delimiter]: crate::Arg::value_delimiter()
pub fn index_of<T: Key>(&self, id: T) -> Option<usize> {
if let Some(arg) = self.get_arg(&Id::from(id)) {
if let Some(i) = arg.get_index(0) {
return Some(i);
}
}
None
let arg = self.get_arg(&Id::from(id))?;
let i = arg.get_index(0)?;
Some(i)
}

/// All indices an argument appeared at when parsing.
Expand Down Expand Up @@ -910,9 +898,11 @@ impl ArgMatches {
/// [`ArgMatches::index_of`]: ArgMatches::index_of()
/// [delimiter]: Arg::value_delimiter()
pub fn indices_of<T: Key>(&self, id: T) -> Option<Indices<'_>> {
self.get_arg(&Id::from(id)).map(|arg| Indices {
let arg = self.get_arg(&Id::from(id))?;
let i = Indices {
iter: arg.indices(),
})
};
Some(i)
}

/// The `ArgMatches` for the current [subcommand].
Expand Down Expand Up @@ -1247,6 +1237,24 @@ impl<'a> Default for Indices<'a> {
}
}

#[track_caller]
#[inline]
fn assert_utf8_validation(arg: &MatchedArg) {
debug_assert!(
matches!(arg.is_invalid_utf8_allowed(), None | Some(false)),
"Must use `_os` lookups with `Arg::allow_invalid_utf8`"
);
}

#[track_caller]
#[inline]
fn assert_no_utf8_validation(arg: &MatchedArg) {
debug_assert!(
matches!(arg.is_invalid_utf8_allowed(), None | Some(true)),
"Must use `Arg::allow_invalid_utf8` with `_os` lookups"
);
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
20 changes: 15 additions & 5 deletions src/parse/matches/matched_arg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ pub(crate) struct MatchedArg {
pub(crate) ty: ValueType,
indices: Vec<usize>,
vals: Vec<Vec<OsString>>,
case_insensitive: bool,
ignore_case: bool,
invalid_utf8_allowed: Option<bool>,
}

impl Default for MatchedArg {
Expand All @@ -40,7 +41,8 @@ impl MatchedArg {
ty: ValueType::Unknown,
indices: Vec::new(),
vals: Vec::new(),
case_insensitive: false,
ignore_case: false,
invalid_utf8_allowed: None,
}
}

Expand Down Expand Up @@ -127,7 +129,7 @@ impl MatchedArg {

pub(crate) fn contains_val(&self, val: &str) -> bool {
self.vals_flatten().any(|v| {
if self.case_insensitive {
if self.ignore_case {
// If `v` isn't utf8, it can't match `val`, so `OsStr::to_str` should be fine
v.to_str().map_or(false, |v| eq_ignore_case(v, val))
} else {
Expand All @@ -140,8 +142,16 @@ impl MatchedArg {
self.ty = ty;
}

pub(crate) fn set_case_insensitive(&mut self, ci: bool) {
self.case_insensitive = ci;
pub(crate) fn set_ignore_case(&mut self, yes: bool) {
self.ignore_case = yes;
}

pub(crate) fn invalid_utf8_allowed(&mut self, yes: bool) {
self.invalid_utf8_allowed = Some(yes);
}

pub(crate) fn is_invalid_utf8_allowed(&self) -> Option<bool> {
self.invalid_utf8_allowed
}
}

Expand Down
13 changes: 8 additions & 5 deletions src/parse/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -706,9 +706,9 @@ impl<'help, 'app> Parser<'help, 'app> {
let mut sc_m = ArgMatcher::new(self.app);

while let Some((v, _)) = it.next() {
if !self.is_set(AS::AllowInvalidUtf8ForExternalSubcommands)
&& v.to_str().is_none()
{
let allow_invalid_utf8 =
self.is_set(AS::AllowInvalidUtf8ForExternalSubcommands);
if !allow_invalid_utf8 && v.to_str().is_none() {
return Err(ClapError::invalid_utf8(
self.app,
Usage::new(self).create_usage_with_title(&[]),
Expand All @@ -720,6 +720,9 @@ impl<'help, 'app> Parser<'help, 'app> {
ValueType::CommandLine,
false,
);
sc_m.get_mut(&Id::empty_hash())
.expect("just inserted")
.invalid_utf8_allowed(allow_invalid_utf8);
}

matcher.subcommand(SubCommand {
Expand Down Expand Up @@ -1803,10 +1806,10 @@ impl<'help, 'app> Parser<'help, 'app> {

/// Increase occurrence of specific argument and the grouped arg it's in.
fn inc_occurrence_of_arg(&self, matcher: &mut ArgMatcher, arg: &Arg<'help>) {
matcher.inc_occurrence_of(&arg.id, arg.is_set(ArgSettings::IgnoreCase));
matcher.inc_occurrence_of_arg(arg);
// Increment or create the group "args"
for group in self.app.groups_for_arg(&arg.id) {
matcher.inc_occurrence_of(&group, false);
matcher.inc_occurrence_of_group(&group);
}
}
}
Expand Down

0 comments on commit e1db873

Please sign in to comment.