Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(parser): Limit recursion to block stackoverflow #404

Merged
merged 1 commit into from Dec 27, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 6 additions & 0 deletions crates/toml_edit/Cargo.toml
Expand Up @@ -39,6 +39,12 @@ default = []
easy = ["serde"]
perf = ["dep:kstring"]
serde = ["dep:serde", "toml_datetime/serde"]
# Provide a method disable_recursion_limit to parse arbitrarily deep structures
# without any consideration for overflowing the stack. Additionally you will
# need to be careful around other recursive operations on the parsed result
# which may overflow the stack after deserialization has completed, including,
# but not limited to, Display and Debug and Drop impls.
unbounded = []

[dependencies]
indexmap = "1.9.1"
Expand Down
83 changes: 48 additions & 35 deletions crates/toml_edit/src/parser/array.rs
Expand Up @@ -12,15 +12,19 @@ use crate::parser::prelude::*;
// ;; Array

// array = array-open array-values array-close
pub(crate) fn array(input: Input<'_>) -> IResult<Input<'_>, Array, ParserError<'_>> {
delimited(
ARRAY_OPEN,
cut(array_values),
cut(ARRAY_CLOSE)
.context(Context::Expression("array"))
.context(Context::Expected(ParserValue::CharLiteral(']'))),
)
.parse(input)
pub(crate) fn array(
check: RecursionCheck,
) -> impl FnMut(Input<'_>) -> IResult<Input<'_>, Array, ParserError<'_>> {
move |input| {
delimited(
ARRAY_OPEN,
cut(array_values(check)),
cut(ARRAY_CLOSE)
.context(Context::Expression("array"))
.context(Context::Expected(ParserValue::CharLiteral(']'))),
)
.parse(input)
}
}

// note: we're omitting ws and newlines here, because
Expand All @@ -35,36 +39,45 @@ const ARRAY_SEP: u8 = b',';
// note: this rule is modified
// array-values = [ ( array-value array-sep array-values ) /
// array-value / ws-comment-newline ]
pub(crate) fn array_values(input: Input<'_>) -> IResult<Input<'_>, Array, ParserError<'_>> {
(
opt(
(separated_list1(ARRAY_SEP, array_value), opt(ARRAY_SEP)).map(
|(v, trailing): (Vec<Value>, Option<u8>)| {
pub(crate) fn array_values(
check: RecursionCheck,
) -> impl FnMut(Input<'_>) -> IResult<Input<'_>, Array, ParserError<'_>> {
move |input| {
let check = check.recursing(input)?;
(
opt((
separated_list1(ARRAY_SEP, array_value(check)),
opt(ARRAY_SEP),
)
.map(|(v, trailing): (Vec<Value>, Option<u8>)| {
(
Array::with_vec(v.into_iter().map(Item::Value).collect()),
trailing.is_some(),
)
},
),
),
ws_comment_newline,
)
.map_res::<_, _, std::str::Utf8Error>(|(array, trailing)| {
let (mut array, comma) = array.unwrap_or_default();
array.set_trailing_comma(comma);
array.set_trailing(std::str::from_utf8(trailing)?);
Ok(array)
})
.parse(input)
})),
ws_comment_newline,
)
.map_res::<_, _, std::str::Utf8Error>(|(array, trailing)| {
let (mut array, comma) = array.unwrap_or_default();
array.set_trailing_comma(comma);
array.set_trailing(std::str::from_utf8(trailing)?);
Ok(array)
})
.parse(input)
}
}

pub(crate) fn array_value(input: Input<'_>) -> IResult<Input<'_>, Value, ParserError<'_>> {
(ws_comment_newline, value, ws_comment_newline)
.map_res::<_, _, std::str::Utf8Error>(|(ws1, v, ws2)| {
let v = v.decorated(std::str::from_utf8(ws1)?, std::str::from_utf8(ws2)?);
Ok(v)
})
.parse(input)
pub(crate) fn array_value(
check: RecursionCheck,
) -> impl FnMut(Input<'_>) -> IResult<Input<'_>, Value, ParserError<'_>> {
move |input| {
(ws_comment_newline, value(check), ws_comment_newline)
.map_res::<_, _, std::str::Utf8Error>(|(ws1, v, ws2)| {
let v = v.decorated(std::str::from_utf8(ws1)?, std::str::from_utf8(ws2)?);
Ok(v)
})
.parse(input)
}
}

#[cfg(test)]
Expand Down Expand Up @@ -109,13 +122,13 @@ mod test {
r#"[ { x = 1, a = "2" }, {a = "a",b = "b", c = "c"} ]"#,
];
for input in inputs {
let parsed = array.parse(input.as_bytes()).finish();
let parsed = array(Default::default()).parse(input.as_bytes()).finish();
assert_eq!(parsed.map(|a| a.to_string()), Ok(input.to_owned()));
}

let invalid_inputs = [r#"["#, r#"[,]"#, r#"[,2]"#, r#"[1e165,,]"#];
for input in invalid_inputs {
let parsed = array.parse(input.as_bytes()).finish();
let parsed = array(Default::default()).parse(input.as_bytes()).finish();
assert!(parsed.is_err());
}
}
Expand Down
2 changes: 1 addition & 1 deletion crates/toml_edit/src/parser/document.rs
Expand Up @@ -111,7 +111,7 @@ pub(crate) fn parse_keyval(
.context(Context::Expected(ParserValue::CharLiteral('='))),
(
ws,
value,
value(RecursionCheck::default()),
line_trailing
.context(Context::Expected(ParserValue::CharLiteral('\n')))
.context(Context::Expected(ParserValue::CharLiteral('#'))),
Expand Down
3 changes: 3 additions & 0 deletions crates/toml_edit/src/parser/errors.rs
Expand Up @@ -344,6 +344,8 @@ pub(crate) enum CustomError {
actual: &'static str,
},
OutOfRange,
#[cfg_attr(feature = "unbounded", allow(dead_code))]
RecursionLimitExceeded,
}

impl CustomError {
Expand Down Expand Up @@ -394,6 +396,7 @@ impl Display for CustomError {
)
}
CustomError::OutOfRange => writeln!(f, "Value is out of range"),
CustomError::RecursionLimitExceeded => writeln!(f, "Recursion limit exceded"),
}
}
}
92 changes: 54 additions & 38 deletions crates/toml_edit/src/parser/inline_table.rs
Expand Up @@ -17,15 +17,19 @@ use indexmap::map::Entry;
// ;; Inline Table

// inline-table = inline-table-open inline-table-keyvals inline-table-close
pub(crate) fn inline_table(input: Input<'_>) -> IResult<Input<'_>, InlineTable, ParserError<'_>> {
delimited(
INLINE_TABLE_OPEN,
cut(inline_table_keyvals.map_res(|(kv, p)| table_from_pairs(kv, p))),
cut(INLINE_TABLE_CLOSE)
.context(Context::Expression("inline table"))
.context(Context::Expected(ParserValue::CharLiteral('}'))),
)
.parse(input)
pub(crate) fn inline_table(
check: RecursionCheck,
) -> impl FnMut(Input<'_>) -> IResult<Input<'_>, InlineTable, ParserError<'_>> {
move |input| {
delimited(
INLINE_TABLE_OPEN,
cut(inline_table_keyvals(check).map_res(|(kv, p)| table_from_pairs(kv, p))),
cut(INLINE_TABLE_CLOSE)
.context(Context::Expression("inline table"))
.context(Context::Expected(ParserValue::CharLiteral('}'))),
)
.parse(input)
}
}

fn table_from_pairs(
Expand Down Expand Up @@ -93,36 +97,44 @@ pub(crate) const KEYVAL_SEP: u8 = b'=';
// ( key keyval-sep val )

fn inline_table_keyvals(
input: Input<'_>,
) -> IResult<Input<'_>, (Vec<(Vec<Key>, TableKeyValue)>, &str), ParserError<'_>> {
(separated_list0(INLINE_TABLE_SEP, keyval), ws).parse(input)
check: RecursionCheck,
) -> impl FnMut(Input<'_>) -> IResult<Input<'_>, (Vec<(Vec<Key>, TableKeyValue)>, &str), ParserError<'_>>
{
move |input| {
let check = check.recursing(input)?;
(separated_list0(INLINE_TABLE_SEP, keyval(check)), ws).parse(input)
}
}

fn keyval(input: Input<'_>) -> IResult<Input<'_>, (Vec<Key>, TableKeyValue), ParserError<'_>> {
(
key,
cut((
one_of(KEYVAL_SEP)
.context(Context::Expected(ParserValue::CharLiteral('.')))
.context(Context::Expected(ParserValue::CharLiteral('='))),
(ws, value, ws),
)),
)
.map(|(key, (_, v))| {
let mut path = key;
let key = path.pop().expect("grammar ensures at least 1");
fn keyval(
check: RecursionCheck,
) -> impl FnMut(Input<'_>) -> IResult<Input<'_>, (Vec<Key>, TableKeyValue), ParserError<'_>> {
move |input| {
(
key,
cut((
one_of(KEYVAL_SEP)
.context(Context::Expected(ParserValue::CharLiteral('.')))
.context(Context::Expected(ParserValue::CharLiteral('='))),
(ws, value(check), ws),
)),
)
.map(|(key, (_, v))| {
let mut path = key;
let key = path.pop().expect("grammar ensures at least 1");

let (pre, v, suf) = v;
let v = v.decorated(pre, suf);
(
path,
TableKeyValue {
key,
value: Item::Value(v),
},
)
})
.parse(input)
let (pre, v, suf) = v;
let v = v.decorated(pre, suf);
(
path,
TableKeyValue {
key,
value: Item::Value(v),
},
)
})
.parse(input)
}
}

#[cfg(test)]
Expand All @@ -139,12 +151,16 @@ mod test {
r#"{ hello.world = "a" }"#,
];
for input in inputs {
let parsed = inline_table.parse(input.as_bytes()).finish();
let parsed = inline_table(Default::default())
.parse(input.as_bytes())
.finish();
assert_eq!(parsed.map(|a| a.to_string()), Ok(input.to_owned()));
}
let invalid_inputs = [r#"{a = 1e165"#, r#"{ hello = "world", a = 2, hello = 1}"#];
for input in invalid_inputs {
let parsed = inline_table.parse(input.as_bytes()).finish();
let parsed = inline_table(Default::default())
.parse(input.as_bytes())
.finish();
assert!(parsed.is_err());
}
}
Expand Down
43 changes: 42 additions & 1 deletion crates/toml_edit/src/parser/mod.rs
Expand Up @@ -18,7 +18,7 @@ pub(crate) mod value;

pub use errors::TomlError;

mod prelude {
pub(crate) mod prelude {
pub(crate) use super::errors::Context;
pub(crate) use super::errors::ParserError;
pub(crate) use super::errors::ParserValue;
Expand Down Expand Up @@ -61,4 +61,45 @@ mod prelude {
}
}
}

#[cfg(not(feature = "unbounded"))]
#[derive(Copy, Clone, Debug, Default)]
pub(crate) struct RecursionCheck {
current: usize,
}

#[cfg(not(feature = "unbounded"))]
impl RecursionCheck {
pub(crate) fn recursing(
mut self,
input: Input<'_>,
) -> Result<Self, nom8::Err<ParserError<'_>>> {
self.current += 1;
if self.current < 128 {
Ok(self)
} else {
Err(nom8::Err::Error(
nom8::error::FromExternalError::from_external_error(
input,
nom8::error::ErrorKind::Eof,
super::errors::CustomError::RecursionLimitExceeded,
),
))
}
}
}

#[cfg(feature = "unbounded")]
#[derive(Copy, Clone, Debug, Default)]
pub(crate) struct RecursionCheck {}

#[cfg(feature = "unbounded")]
impl RecursionCheck {
pub(crate) fn recursing(
self,
_input: Input<'_>,
) -> Result<Self, nom8::Err<ParserError<'_>>> {
Ok(self)
}
}
}
14 changes: 9 additions & 5 deletions crates/toml_edit/src/parser/value.rs
Expand Up @@ -15,16 +15,19 @@ use crate::value as v;
use crate::Value;

// val = string / boolean / array / inline-table / date-time / float / integer
pub(crate) fn value(input: Input<'_>) -> IResult<Input<'_>, v::Value, ParserError<'_>> {
dispatch!{peek(any);
pub(crate) fn value(
check: RecursionCheck,
) -> impl FnMut(Input<'_>) -> IResult<Input<'_>, v::Value, ParserError<'_>> {
move |input| {
dispatch!{peek(any);
crate::parser::strings::QUOTATION_MARK |
crate::parser::strings::APOSTROPHE => string.map(|s| {
v::Value::String(Formatted::new(
s.into_owned()
))
}),
crate::parser::array::ARRAY_OPEN => array.map(v::Value::Array),
crate::parser::inline_table::INLINE_TABLE_OPEN => inline_table.map(v::Value::InlineTable),
crate::parser::array::ARRAY_OPEN => array(check).map(v::Value::Array),
crate::parser::inline_table::INLINE_TABLE_OPEN => inline_table(check).map(v::Value::InlineTable),
// Date/number starts
b'+' | b'-' | b'0'..=b'9' => {
// Uncommon enough not to be worth optimizing at this time
Expand Down Expand Up @@ -83,6 +86,7 @@ pub(crate) fn value(input: Input<'_>) -> IResult<Input<'_>, v::Value, ParserErro
.with_recognized()
.map_res(|(value, raw)| apply_raw(value, raw))
.parse(input)
}
}

fn apply_raw(mut val: Value, raw: &[u8]) -> Result<Value, std::str::Utf8Error> {
Expand Down Expand Up @@ -137,7 +141,7 @@ trimmed in raw strings.
r#"[ { x = 1, a = "2" }, {a = "a",b = "b", c = "c"} ]"#,
];
for input in inputs {
let parsed = value.parse(input.as_bytes()).finish();
let parsed = value(Default::default()).parse(input.as_bytes()).finish();
assert_eq!(parsed.map(|a| a.to_string()), Ok(input.to_owned()));
}
}
Expand Down
7 changes: 5 additions & 2 deletions crates/toml_edit/src/value.rs
Expand Up @@ -211,10 +211,13 @@ impl FromStr for Value {

/// Parses a value from a &str
fn from_str(s: &str) -> Result<Self, Self::Err> {
use nom8::prelude::*;
use crate::parser::prelude::*;
use nom8::FinishIResult;

let b = s.as_bytes();
let parsed = parser::value::value.parse(b).finish();
let parsed = parser::value::value(RecursionCheck::default())
.parse(b)
.finish();
match parsed {
Ok(mut value) => {
// Only take the repr and not decor, as its probably not intended
Expand Down