Skip to content

Commit

Permalink
feat: Add separated_list_m_n
Browse files Browse the repository at this point in the history
This is useful to naïvely represent some grammar productions
(e.g. `IPv6address` from [RFC 3986]):

```abnf
IPv6address /= [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
```

turns to

```diff
 tuple((
-    opt(tuple((many_m_n(0, 2, tuple((h16, char(':')))), h16))),
+    separated_list_m_n(0, 3, char(':'), h16),
     tag("::"),
     many_m_n(2, 2, tuple((h16, char(':')))),
     ls32,
 ))
```

[RFC 3986]: https://www.rfc-editor.org/rfc/rfc3986#appendix-A
  • Loading branch information
Jan Tojnar committed Jan 22, 2024
1 parent e87c7da commit ecd8bb5
Show file tree
Hide file tree
Showing 3 changed files with 225 additions and 1 deletion.
1 change: 1 addition & 0 deletions doc/choosing_a_combinator.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ Those are used to recognize the lowest level elements of your grammar, like, "he
| [many_m_n](https://docs.rs/nom/latest/nom/multi/fn.many_m_n.html) | `many_m_n(1, 3, tag("ab"))` | `"ababc"` | `Ok(("c", vec!["ab", "ab"]))` |Applies the parser between m and n times (n included) and returns the list of results in a Vec|
| [many_till](https://docs.rs/nom/latest/nom/multi/fn.many_till.html) | `many_till(tag( "ab" ), tag( "ef" ))` | `"ababefg"` | `Ok(("g", (vec!["ab", "ab"], "ef")))` |Applies the first parser until the second applies. Returns a tuple containing the list of results from the first in a Vec and the result of the second|
| [separated_list0](https://docs.rs/nom/latest/nom/multi/fn.separated_list0.html) | `separated_list0(tag(","), tag("ab"))` | `"ab,ab,ab."` | `Ok((".", vec!["ab", "ab", "ab"]))` |`separated_list1` works like `separated_list0` but must returns at least one element|
| [separated_list_m_n](https://docs.rs/nom/latest/nom/multi/fn.separated_list0.html) | `separated_list_m_n(2, 3, tag(","), tag("ab"))` | `"ab,ab,ab."` | `Ok((".", vec!["ab", "ab", "ab"]))` | Alternately applies the item parser and the separator parser and returns the list of items in a Vec if the number is between m and n (inclusive).|
| [fold_many0](https://docs.rs/nom/latest/nom/multi/fn.fold_many0.html) | `fold_many0(be_u8, \|\| 0, \|acc, item\| acc + item)` | `[1, 2, 3]` | `Ok(([], 6))` |Applies the parser 0 or more times and folds the list of return values. The `fold_many1` version must apply the child parser at least one time|
| [fold_many_m_n](https://docs.rs/nom/latest/nom/multi/fn.fold_many_m_n.html) | `fold_many_m_n(1, 2, be_u8, \|\| 0, \|acc, item\| acc + item)` | `[1, 2, 3]` | `Ok(([3], 3))` |Applies the parser between m and n times (n included) and folds the list of return value|
| [length_count](https://docs.rs/nom/latest/nom/multi/fn.length_count.html) | `length_count(number, tag("ab"))` | `"2ababab"` | `Ok(("ab", vec!["ab", "ab"]))` |Gets a number from the first parser, then applies the second parser that many times|
Expand Down
142 changes: 142 additions & 0 deletions src/multi/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,148 @@ where
}
}

/// Alternates between two parsers to produce a list of at most `max` elements until [`Err::Error`].
///
/// Fails if the element parser does not produce at least `min` elements.
///
/// This stops when either parser returns [`Err::Error`] or the number of elements reaches `max`,
/// and returns the results that were accumulated. To instead chain an error up, see
/// [`cut`][crate::combinator::cut].
///
/// # Arguments
/// * `min` The minimum number of elements.
/// * `max` The maximum number of elements.
/// * `separator` Parses the separator between list elements.
/// * `parser` Parses the elements of the list.
///
/// ```rust
/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult, Parser};
/// use nom::multi::separated_list_m_n;
/// use nom::bytes::complete::tag;
///
/// fn parser(s: &str) -> IResult<&str, Vec<&str>> {
/// separated_list_m_n(2, 3, tag("|"), tag("abc")).parse(s)
/// }
///
/// assert_eq!(parser("abc|abc|abc"), Ok(("", vec!["abc", "abc", "abc"])));
/// assert_eq!(parser("abc|abc|def"), Ok(("|def", vec!["abc", "abc"])));
/// assert_eq!(parser("abc1abc"), Err(Err::Error(Error::new("1abc", ErrorKind::SeparatedList))));
/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::Tag))));
/// assert_eq!(parser("def|abc"), Err(Err::Error(Error::new("def|abc", ErrorKind::Tag))));
/// ```
#[cfg(feature = "alloc")]
#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
pub fn separated_list_m_n<I, E: std::fmt::Debug, F, G>(
min: usize,
max: usize,
separator: G,
parser: F,
) -> impl Parser<I, Output = Vec<<F as Parser<I>>::Output>, Error = E>
where
I: Clone + InputLength,
F: Parser<I, Error = E>,
G: Parser<I, Error = E>,
E: ParseError<I>,
{
SeparatedListMN {
parser,
separator,
min,
max,
}
}

#[cfg(feature = "alloc")]
/// Parser implementation for the [separated_list_m_n] combinator
pub struct SeparatedListMN<F, G> {
parser: F,
separator: G,
min: usize,
max: usize,
}

#[cfg(feature = "alloc")]
impl<I, E: ParseError<I> + std::fmt::Debug, F, G> Parser<I> for SeparatedListMN<F, G>
where
I: Clone + InputLength,
F: Parser<I, Error = E>,
G: Parser<I, Error = E>,
{
type Output = Vec<<F as Parser<I>>::Output>;
type Error = <F as Parser<I>>::Error;

fn process<OM: OutputMode>(
&mut self,
mut i: I,
) -> crate::PResult<OM, I, Self::Output, Self::Error> {
let mut res = OM::Output::bind(crate::lib::std::vec::Vec::new);
let mut res_len = 0usize;

match self.parser.process::<OM>(i.clone()) {
Err(e) => {
if (self.min..=self.max).contains(&res_len) {
return Ok((i, res));
} else {
return Err(e);
}
}
Ok((i1, o)) => {
res = OM::Output::combine(res, o, |mut res, o| {
res.push(o);
res_len += 1;
res
});
i = i1;
}
}

loop {
let len = i.input_len();
match self.separator.process::<OM>(i.clone()) {
Err(Err::Error(_)) => {
if (self.min..=self.max).contains(&res_len) {
return Ok((i, res));
} else {
return Err(Err::Error(OM::Error::bind(|| {
<F as Parser<I>>::Error::from_error_kind(i, ErrorKind::SeparatedList)
})));
}
}
Err(e) => return Err(e),
Ok((i1, _)) => {
// infinite loop check: the parser must always consume
if i1.input_len() == len {
return Err(Err::Error(OM::Error::bind(|| {
<F as Parser<I>>::Error::from_error_kind(i, ErrorKind::SeparatedList)
})));
}

match self.parser.process::<OM>(i1.clone()) {
Err(Err::Error(_)) => {
if (self.min..=self.max).contains(&res_len) {
return Ok((i, res));
} else {
return Err(Err::Error(OM::Error::bind(|| {
<F as Parser<I>>::Error::from_error_kind(i, ErrorKind::SeparatedList)
})));
}
}
Err(e) => return Err(e),
Ok((i2, o)) => {
res = OM::Output::combine(res, o, |mut res, o| {
res.push(o);
res_len += 1;
res
});
i = i2;
}
}
}
}
}
}
}

/// Repeats the embedded parser `m..=n` times
///
/// This stops before `n` when the parser returns [`Err::Error`] and returns the results that were accumulated. To instead chain an error up, see
Expand Down
83 changes: 82 additions & 1 deletion src/multi/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use crate::{
lib::std::vec::Vec,
multi::{
count, fold, fold_many0, fold_many1, fold_many_m_n, length_count, many, many0, many1, many_m_n,
many_till, separated_list0, separated_list1,
many_till, separated_list0, separated_list1, separated_list_m_n,
},
};

Expand Down Expand Up @@ -103,6 +103,87 @@ fn separated_list1_test() {
assert_eq!(multi(h), Err(Err::Incomplete(Needed::new(1))));
}

#[test]
#[cfg(feature = "alloc")]
fn separated_list_m_n_test() {
fn multi(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
separated_list_m_n(2, 4, tag(","), tag("abcd")).parse(i)
}
fn multi_empty(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
separated_list_m_n(2, 4, tag(","), tag("")).parse(i)
}
fn empty_sep(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
separated_list_m_n(2, 4, tag(""), tag("abc")).parse(i)
}
fn multi_longsep(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
separated_list_m_n(2, 4, tag(".."), tag("abcd")).parse(i)
}
fn multi0(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
separated_list_m_n(0, 3, tag(","), tag("abc")).parse(i)
}

let no_items = &b"azerty"[..];
let less_items = &b"abcdef"[..];
let lower_bound = &b"abcd,abcdef"[..];
let empty_items = &b",,abc"[..];
let trailing_sep = &b"abcd,abcd,ef"[..];
let incomplete_less_items = &b"abc"[..];
let incomplete_sep = &b"abcd."[..];
let incomplete_item = &b"abcd,abc"[..];
let not_separated = &b"abcabc"[..];

let no_items_err_pos = &no_items[0..];
assert_eq!(
multi(no_items),
Err(Err::Error(error_position!(
no_items_err_pos,
ErrorKind::Tag
)))
);

let less_items_err_pos = &less_items[4..];
assert_eq!(
multi(less_items),
Err(Err::Error(error_position!(
less_items_err_pos,
ErrorKind::SeparatedList
)))
);

let lower_bound_res = vec![&b"abcd"[..], &b"abcd"[..]];
assert_eq!(multi(lower_bound), Ok((&b"ef"[..], lower_bound_res)));

let empty_items_res = vec![&b""[..], &b""[..], &b""[..]];
assert_eq!(multi_empty(empty_items), Ok((&b"abc"[..], empty_items_res)));

let not_separated_err_pos = &not_separated[3..];
assert_eq!(
empty_sep(not_separated),
Err(Err::Error(error_position!(
not_separated_err_pos,
ErrorKind::SeparatedList
)))
);

let trailing_sep_res = vec![&b"abcd"[..], &b"abcd"[..]];
assert_eq!(multi(trailing_sep), Ok((&b",ef"[..], trailing_sep_res)));

assert_eq!(
multi(incomplete_less_items),
Err(Err::Incomplete(Needed::new(1)))
);

assert_eq!(
multi_longsep(incomplete_sep),
Err(Err::Incomplete(Needed::new(1)))
);

assert_eq!(multi(incomplete_item), Err(Err::Incomplete(Needed::new(1))));

let no_items0_res = vec![];
assert_eq!(multi0(no_items), Ok((&no_items[0..], no_items0_res)));
}

#[test]
#[cfg(feature = "alloc")]
fn many0_test() {
Expand Down

0 comments on commit ecd8bb5

Please sign in to comment.