diff --git a/crates/toml_edit/src/document.rs b/crates/toml_edit/src/document.rs index 9a1e6d7e..d2898cde 100644 --- a/crates/toml_edit/src/document.rs +++ b/crates/toml_edit/src/document.rs @@ -70,7 +70,7 @@ impl FromStr for Document { /// Parses a document from a &str fn from_str(s: &str) -> Result { - parser::TomlParser::parse(s.as_bytes()) + parser::document(s.as_bytes()) } } diff --git a/crates/toml_edit/src/parser/array.rs b/crates/toml_edit/src/parser/array.rs index 2c77843c..95604ecc 100644 --- a/crates/toml_edit/src/parser/array.rs +++ b/crates/toml_edit/src/parser/array.rs @@ -1,11 +1,12 @@ -use crate::parser::trivia::ws_comment_newline; -use crate::parser::value::value; -use crate::{Array, Value}; use combine::parser::byte::byte; use combine::parser::range::recognize_with_value; use combine::stream::RangeStream; use combine::*; +use crate::parser::trivia::ws_comment_newline; +use crate::parser::value::value; +use crate::{Array, Value}; + // ;; Array // array = array-open array-values array-close @@ -55,3 +56,58 @@ parse!(array_value() -> Value, { Ok(v) }) }); + +#[cfg(test)] +mod test { + use super::*; + + use combine::stream::position::Stream; + + #[test] + fn arrays() { + let inputs = [ + r#"[]"#, + r#"[ ]"#, + r#"[ + 1, 2, 3 +]"#, + r#"[ + 1, + 2, # this is ok +]"#, + r#"[# comment +# comment2 + + + ]"#, + r#"[# comment +# comment2 + 1 + +#sd +, +# comment3 + + ]"#, + r#"[1]"#, + r#"[1,]"#, + r#"[ "all", 'strings', """are the same""", '''type''']"#, + r#"[ 100, -2,]"#, + r#"[1, 2, 3]"#, + r#"[1.1, 2.1, 3.1]"#, + r#"["a", "b", "c"]"#, + r#"[ [ 1, 2 ], [3, 4, 5] ]"#, + r#"[ [ 1, 2 ], ["a", "b", "c"] ]"#, + r#"[ { x = 1, a = "2" }, {a = "a",b = "b", c = "c"} ]"#, + ]; + for input in inputs { + parsed_value_eq!(input); + } + + let invalid_inputs = [r#"["#, r#"[,]"#, r#"[,2]"#, r#"[1e165,,]"#]; + for input in invalid_inputs { + let parsed = array().easy_parse(Stream::new(input.as_bytes())); + assert!(parsed.is_err()); + } + } +} diff --git a/crates/toml_edit/src/parser/datetime.rs b/crates/toml_edit/src/parser/datetime.rs index b3ee8e9e..b3360c04 100644 --- a/crates/toml_edit/src/parser/datetime.rs +++ b/crates/toml_edit/src/parser/datetime.rs @@ -208,3 +208,48 @@ parse!(unsigned_digits(count: usize) -> u32, { s.parse::() }) }); + +#[cfg(test)] +mod test { + #[test] + fn offset_date_time() { + let inputs = [ + "1979-05-27T07:32:00Z", + "1979-05-27T00:32:00-07:00", + "1979-05-27T00:32:00.999999-07:00", + ]; + for input in inputs { + parsed_date_time_eq!(input, is_datetime); + } + } + + #[test] + fn local_date_time() { + let inputs = ["1979-05-27T07:32:00", "1979-05-27T00:32:00.999999"]; + for input in inputs { + parsed_date_time_eq!(input, is_datetime); + } + } + + #[test] + fn local_date() { + let inputs = ["1979-05-27", "2017-07-20"]; + for input in inputs { + parsed_date_time_eq!(input, is_datetime); + } + } + + #[test] + fn local_time() { + let inputs = ["07:32:00", "00:32:00.999999"]; + for input in inputs { + parsed_date_time_eq!(input, is_datetime); + } + } + + #[test] + fn time_fraction_truncated() { + let input = "1987-07-05T17:45:00.123456789012345Z"; + parsed_date_time_eq!(input, is_datetime); + } +} diff --git a/crates/toml_edit/src/parser/document.rs b/crates/toml_edit/src/parser/document.rs index 8ea08558..16032ffe 100644 --- a/crates/toml_edit/src/parser/document.rs +++ b/crates/toml_edit/src/parser/document.rs @@ -1,44 +1,89 @@ +use std::cell::RefCell; + +use combine::parser::byte::byte; +use combine::stream::position::{IndexPositioner, Positioner, Stream}; +use combine::stream::RangeStream; +use combine::Parser; +use combine::*; + use crate::document::Document; use crate::key::Key; -use crate::parser::errors::CustomError; use crate::parser::inline_table::KEYVAL_SEP; use crate::parser::key::key; use crate::parser::table::table; use crate::parser::trivia::{comment, line_ending, line_trailing, newline, ws}; use crate::parser::value::value; -use crate::parser::{TomlError, TomlParser}; +use crate::parser::{ParseState, TomlError}; use crate::table::TableKeyValue; -use crate::{InternalString, Item}; -use combine::parser::byte::byte; -use combine::stream::position::{IndexPositioner, Positioner, Stream}; -use combine::stream::RangeStream; -use combine::Parser; -use combine::*; -use indexmap::map::Entry; -use std::cell::RefCell; -use std::mem; -use std::ops::DerefMut; +use crate::Item; + +// ;; TOML + +// toml = expression *( newline expression ) + +// expression = ( ( ws comment ) / +// ( ws keyval ws [ comment ] ) / +// ( ws table ws [ comment ] ) / +// ws ) +pub(crate) fn document(s: &[u8]) -> Result { + // Remove BOM if present + let s = s.strip_prefix(b"\xEF\xBB\xBF").unwrap_or(s); + + let parser = RefCell::new(ParseState::default()); + let input = Stream::new(s); + + let parsed = parse_ws(&parser) + .with(choice(( + eof(), + skip_many1( + look_ahead(any()) + .then(|e| { + dispatch!(e; + crate::parser::trivia::COMMENT_START_SYMBOL => parse_comment(&parser), + crate::parser::table::STD_TABLE_OPEN => table(&parser), + crate::parser::trivia::LF | + crate::parser::trivia::CR => parse_newline(&parser), + _ => keyval(&parser), + ) + }) + .skip(parse_ws(&parser)), + ), + ))) + .easy_parse(input); + match parsed { + Ok((_, ref rest)) if !rest.input.is_empty() => Err(TomlError::from_unparsed( + (&rest.positioner + as &dyn Positioner) + .position(), + s, + )), + Ok(..) => { + let doc = parser + .into_inner() + .into_document() + .map_err(|e| TomlError::custom(e.to_string()))?; + Ok(doc) + } + Err(e) => Err(TomlError::new(e, s)), + } +} toml_parser!(parse_comment, parser, { (comment(), line_ending()).and_then::<_, _, std::str::Utf8Error>(|(c, e)| { let c = std::str::from_utf8(c)?; - parser.borrow_mut().deref_mut().on_comment(c, e); + parser.borrow_mut().on_comment(c, e); Ok(()) }) }); -toml_parser!( - parse_ws, - parser, - ws().map(|w| parser.borrow_mut().deref_mut().on_ws(w)) -); +toml_parser!(parse_ws, parser, ws().map(|w| parser.borrow_mut().on_ws(w))); toml_parser!(parse_newline, parser, { - newline().map(|_| parser.borrow_mut().deref_mut().on_ws("\n")) + newline().map(|_| parser.borrow_mut().on_ws("\n")) }); toml_parser!(keyval, parser, { - parse_keyval().and_then(|(p, kv)| parser.borrow_mut().deref_mut().on_keyval(p, kv)) + parse_keyval().and_then(|(p, kv)| parser.borrow_mut().on_keyval(p, kv)) }); // keyval = key keyval-sep val @@ -77,106 +122,113 @@ parser! { } } -impl TomlParser { - // ;; TOML - - // toml = expression *( newline expression ) - - // expression = ( ( ws comment ) / - // ( ws keyval ws [ comment ] ) / - // ( ws table ws [ comment ] ) / - // ws ) - pub(crate) fn parse(s: &[u8]) -> Result { - // Remove BOM if present - let s = s.strip_prefix(b"\xEF\xBB\xBF").unwrap_or(s); - - let mut parser = RefCell::new(Self::default()); - let input = Stream::new(s); - - let parsed = parse_ws(&parser) - .with(choice(( - eof(), - skip_many1( - look_ahead(any()).then(|e| { - dispatch!(e; - crate::parser::trivia::COMMENT_START_SYMBOL => parse_comment(&parser), - crate::parser::table::STD_TABLE_OPEN => table(&parser), - crate::parser::trivia::LF | - crate::parser::trivia::CR => parse_newline(&parser), - _ => keyval(&parser), - ) - }) - .skip(parse_ws(&parser)), - ), - ))) - .easy_parse(input); - match parsed { - Ok((_, ref rest)) if !rest.input.is_empty() => Err(TomlError::from_unparsed( - (&rest.positioner - as &dyn Positioner) - .position(), - s, - )), - Ok(..) => { - parser - .get_mut() - .finalize_table() - .map_err(|e| TomlError::custom(e.to_string()))?; - let trailing = parser.borrow().trailing.as_str().into(); - parser.get_mut().document.trailing = trailing; - Ok(parser.into_inner().document) - } - Err(e) => Err(TomlError::new(e, s)), - } - } - - fn on_ws(&mut self, w: &str) { - self.trailing.push_str(w); - } - - fn on_comment(&mut self, c: &str, e: &str) { - self.trailing = [&self.trailing, c, e].concat(); - } - - fn on_keyval(&mut self, mut path: Vec, mut kv: TableKeyValue) -> Result<(), CustomError> { - { - let prefix = mem::take(&mut self.trailing); - let first_key = if path.is_empty() { - &mut kv.key - } else { - &mut path[0] +#[cfg(test)] +mod test { + use super::*; + + use snapbox::assert_eq; + + #[test] + fn documents() { + let documents = [ + r#" +# This is a TOML document. + +title = "TOML Example" + + [owner] + name = "Tom Preston-Werner" + dob = 1979-05-27T07:32:00-08:00 # First class dates + + [database] + server = "192.168.1.1" + ports = [ 8001, 8001, 8002 ] + connection_max = 5000 + enabled = true + + [servers] + + # Indentation (tabs and/or spaces) is allowed but not required +[servers.alpha] + ip = "10.0.0.1" + dc = "eqdc10" + + [servers.beta] + ip = "10.0.0.2" + dc = "eqdc10" + + [clients] + data = [ ["gamma", "delta"], [1, 2] ] + + # Line breaks are OK when inside arrays +hosts = [ + "alpha", + "omega" +] + + 'some.wierd .stuff' = """ + like + that + # """ # this broke my sintax highlighting + " also. like " = ''' +that +''' + double = 2e39 # this number looks familiar +# trailing comment"#, + r#""#, + r#" "#, + r#" hello = 'darkness' # my old friend +"#, + r#"[parent . child] +key = "value" +"#, + r#"hello.world = "a" +"#, + r#"foo = 1979-05-27 # Comment +"#, + ]; + for input in documents { + let doc = document(input.as_bytes()); + let doc = match doc { + Ok(doc) => doc, + Err(err) => { + panic!( + "Parse error: {}\nFailed to parse:\n```\n{}\n```", + err, input + ) + } }; - first_key - .decor - .set_prefix(prefix + first_key.decor.prefix().unwrap_or_default()); - } - - let table = &mut self.current_table; - let table = Self::descend_path(table, &path, true)?; - // "Likewise, using dotted keys to redefine tables already defined in [table] form is not allowed" - let mixed_table_types = table.is_dotted() == path.is_empty(); - if mixed_table_types { - return Err(CustomError::DuplicateKey { - key: kv.key.get().into(), - table: None, - }); + dbg!(doc.to_string()); + dbg!(input); + assert_eq(input, doc.to_string()); } - let key: InternalString = kv.key.get_internal().into(); - match table.items.entry(key) { - Entry::Vacant(o) => { - o.insert(kv); - } - Entry::Occupied(o) => { - // "Since tables cannot be defined more than once, redefining such tables using a [table] header is not allowed" - return Err(CustomError::DuplicateKey { - key: o.key().as_str().into(), - table: Some(self.current_table_path.clone()), - }); + let parse_only = ["\u{FEFF} +[package] +name = \"foo\" +version = \"0.0.1\" +authors = [] +"]; + for input in parse_only { + let doc = document(input.as_bytes()); + match doc { + Ok(_) => (), + Err(err) => { + panic!( + "Parse error: {}\nFailed to parse:\n```\n{}\n```", + err, input + ) + } } } - Ok(()) + let invalid_inputs = [r#" hello = 'darkness' # my old friend +$"#]; + for input in invalid_inputs { + let doc = document(input.as_bytes()); + + assert!(doc.is_err()); + } } } diff --git a/crates/toml_edit/src/parser/errors.rs b/crates/toml_edit/src/parser/errors.rs index ecc89ca2..a2ff053d 100644 --- a/crates/toml_edit/src/parser/errors.rs +++ b/crates/toml_edit/src/parser/errors.rs @@ -1,10 +1,12 @@ -use crate::Key; +use std::error::Error as StdError; +use std::fmt::{Display, Formatter, Result}; + use combine::easy::Errors as ParseError; use combine::stream::easy::Error; use combine::stream::position::SourcePosition; use itertools::Itertools; -use std::error::Error as StdError; -use std::fmt::{Display, Formatter, Result}; + +use crate::Key; /// Type representing a TOML parse error #[derive(Debug, Clone, Eq, PartialEq, Hash)] @@ -266,6 +268,24 @@ pub(crate) enum CustomError { OutOfRange, } +impl CustomError { + pub(crate) fn duplicate_key(path: &[Key], i: usize) -> Self { + assert!(i < path.len()); + Self::DuplicateKey { + key: path[i].to_repr().as_ref().as_raw().into(), + table: Some(path[..i].to_vec()), + } + } + + pub(crate) fn extend_wrong_type(path: &[Key], i: usize, actual: &'static str) -> Self { + assert!(i < path.len()); + Self::DottedKeyExtendWrongType { + key: path[..=i].to_vec(), + actual, + } + } +} + impl StdError for CustomError { fn description(&self) -> &'static str { "TOML parse error" diff --git a/crates/toml_edit/src/parser/inline_table.rs b/crates/toml_edit/src/parser/inline_table.rs index 0392b6bc..442e9832 100644 --- a/crates/toml_edit/src/parser/inline_table.rs +++ b/crates/toml_edit/src/parser/inline_table.rs @@ -1,15 +1,15 @@ +use combine::parser::byte::byte; +use combine::stream::RangeStream; +use combine::*; +use indexmap::map::Entry; + use crate::key::Key; use crate::parser::errors::CustomError; use crate::parser::key::key; -use crate::parser::table::extend_wrong_type; use crate::parser::trivia::ws; use crate::parser::value::value; use crate::table::TableKeyValue; use crate::{InlineTable, InternalString, Item, Value}; -use combine::parser::byte::byte; -use combine::stream::RangeStream; -use combine::*; -use indexmap::map::Entry; // ;; Inline Table @@ -62,7 +62,7 @@ fn descend_path<'a>( table = sweet_child_of_mine; } ref v => { - return Err(extend_wrong_type(path, i, v.type_name())); + return Err(CustomError::extend_wrong_type(path, i, v.type_name())); } } } @@ -110,3 +110,29 @@ parse!(keyval() -> (Vec, TableKeyValue), { ) }) }); + +#[cfg(test)] +mod test { + use super::*; + + use combine::stream::position::Stream; + + #[test] + fn inline_tables() { + let inputs = [ + r#"{}"#, + r#"{ }"#, + r#"{a = 1e165}"#, + r#"{ hello = "world", a = 1}"#, + r#"{ hello.world = "a" }"#, + ]; + for input in inputs { + parsed_value_eq!(input); + } + let invalid_inputs = [r#"{a = 1e165"#, r#"{ hello = "world", a = 2, hello = 1}"#]; + for input in invalid_inputs { + let parsed = inline_table().easy_parse(Stream::new(input.as_bytes())); + assert!(parsed.is_err()); + } + } +} diff --git a/crates/toml_edit/src/parser/key.rs b/crates/toml_edit/src/parser/key.rs index c6ad7327..ceb1873f 100644 --- a/crates/toml_edit/src/parser/key.rs +++ b/crates/toml_edit/src/parser/key.rs @@ -1,12 +1,13 @@ +use combine::parser::byte::byte; +use combine::parser::range::{recognize_with_value, take_while1}; +use combine::stream::RangeStream; +use combine::*; + use crate::key::Key; use crate::parser::strings::{basic_string, literal_string}; use crate::parser::trivia::{from_utf8_unchecked, ws}; use crate::repr::{Decor, Repr}; use crate::InternalString; -use combine::parser::byte::byte; -use combine::parser::range::{recognize_with_value, take_while1}; -use combine::stream::RangeStream; -use combine::*; // key = simple-key / dotted-key // dotted-key = simple-key 1*( dot-sep simple-key ) @@ -54,3 +55,28 @@ pub(crate) fn is_unquoted_char(c: u8) -> bool { // dot-sep = ws %x2E ws ; . Period const DOT_SEP: u8 = b'.'; + +#[cfg(test)] +mod test { + use super::*; + + use combine::stream::position::Stream; + use snapbox::assert_eq; + + #[test] + fn keys() { + let cases = [ + ("a", "a"), + (r#""hello\n ""#, "hello\n "), + (r#"'hello\n '"#, "hello\\n "), + ]; + + for (input, expected) in cases { + let parsed = simple_key().easy_parse(Stream::new(input.as_bytes())); + assert!(parsed.is_ok()); + let ((.., k), rest) = parsed.unwrap(); + assert_eq(k.as_str(), expected); + assert_eq!(rest.input.len(), 0); + } + } +} diff --git a/crates/toml_edit/src/parser/macros.rs b/crates/toml_edit/src/parser/macros.rs index 83d295e7..8f27f577 100644 --- a/crates/toml_edit/src/parser/macros.rs +++ b/crates/toml_edit/src/parser/macros.rs @@ -27,7 +27,7 @@ macro_rules! parse ( macro_rules! toml_parser ( ($name:ident, $argh:ident, $closure:expr) => ( parser!{ - fn $name['a, 'b, I]($argh: &'b RefCell)(I) -> () + fn $name['a, 'b, I]($argh: &'b RefCell)(I) -> () where [I: RangeStream< Range = &'a [u8], @@ -45,3 +45,68 @@ macro_rules! toml_parser ( } ); ); + +#[cfg(test)] +macro_rules! parsed_eq { + ($parsed:ident, $expected:expr) => {{ + assert!($parsed.is_ok(), "{:?}", $parsed.err().unwrap()); + let (v, rest) = $parsed.unwrap(); + assert_eq!(v, $expected); + assert!(rest.input.is_empty()); + }}; +} + +#[cfg(test)] +macro_rules! parsed_float_eq { + ($input:ident, $expected:expr) => {{ + let parsed = crate::parser::numbers::float().easy_parse(Stream::new($input.as_bytes())); + let (v, rest) = match parsed { + Ok(parsed) => parsed, + Err(err) => { + panic!("Unexpected error for {:?}: {:?}", $input, err); + } + }; + if $expected.is_nan() { + assert!(v.is_nan()); + } else if $expected.is_infinite() { + assert!(v.is_infinite()); + assert_eq!($expected.is_sign_positive(), v.is_sign_positive()); + } else { + dbg!($expected); + dbg!(v); + assert!(($expected - v).abs() < std::f64::EPSILON); + } + assert!(rest.input.is_empty()); + }}; +} + +#[cfg(test)] +macro_rules! parsed_value_eq { + ($input:expr) => { + use combine::EasyParser; + let parsed = crate::parser::value::value() + .easy_parse(combine::stream::position::Stream::new($input.as_bytes())); + let (v, rest) = match parsed { + Ok(parsed) => parsed, + Err(err) => { + panic!("Unexpected error for {:?}: {:?}", $input, err); + } + }; + snapbox::assert_eq(v.to_string(), $input); + assert!(rest.input.is_empty()); + }; +} + +#[cfg(test)] +macro_rules! parsed_date_time_eq { + ($input:expr, $is:ident) => {{ + use combine::EasyParser; + let parsed = crate::parser::value::value() + .easy_parse(combine::stream::position::Stream::new($input.as_bytes())); + assert!(parsed.is_ok()); + let (v, rest) = parsed.unwrap(); + snapbox::assert_eq(v.to_string(), $input); + assert!(rest.input.is_empty()); + assert!(v.$is()); + }}; +} diff --git a/crates/toml_edit/src/parser/mod.rs b/crates/toml_edit/src/parser/mod.rs index dcdb1785..03c9ed34 100644 --- a/crates/toml_edit/src/parser/mod.rs +++ b/crates/toml_edit/src/parser/mod.rs @@ -10,662 +10,17 @@ mod errors; mod inline_table; mod key; pub(crate) mod numbers; +mod state; pub(crate) mod strings; mod table; mod trivia; mod value; +pub(crate) use self::document::document; pub use self::errors::TomlError; pub(crate) use self::key::is_unquoted_char; pub(crate) use self::key::key as key_path; pub(crate) use self::key::simple_key; pub(crate) use self::value::value as value_parser; -use self::table::duplicate_key; -use crate::key::Key; -use crate::parser::errors::CustomError; -use crate::repr::Decor; -use crate::{ArrayOfTables, Document, Entry, Item, Table}; - -pub(crate) struct TomlParser { - document: Document, - trailing: String, - current_table_position: usize, - current_table: Table, - current_is_array: bool, - current_table_path: Vec, -} - -impl TomlParser { - pub(crate) fn start_aray_table( - &mut self, - path: Vec, - decor: Decor, - ) -> Result<(), CustomError> { - debug_assert!(!path.is_empty()); - debug_assert!(self.current_table.is_empty()); - debug_assert!(self.current_table_path.is_empty()); - - // Look up the table on start to ensure the duplicate_key error points to the right line - let root = self.document.as_table_mut(); - let parent_table = Self::descend_path(root, &path[..path.len() - 1], false)?; - let key = &path[path.len() - 1]; - let entry = parent_table - .entry_format(key) - .or_insert(Item::ArrayOfTables(ArrayOfTables::new())); - entry - .as_array_of_tables() - .ok_or_else(|| duplicate_key(&path, path.len() - 1))?; - - self.current_table_position += 1; - self.current_table.decor = decor; - self.current_table.set_position(self.current_table_position); - self.current_is_array = true; - self.current_table_path = path; - - Ok(()) - } - - pub(crate) fn start_table(&mut self, path: Vec, decor: Decor) -> Result<(), CustomError> { - debug_assert!(!path.is_empty()); - debug_assert!(self.current_table.is_empty()); - debug_assert!(self.current_table_path.is_empty()); - - // 1. Look up the table on start to ensure the duplicate_key error points to the right line - // 2. Ensure any child tables from an implicit table are preserved - let root = self.document.as_table_mut(); - let parent_table = Self::descend_path(root, &path[..path.len() - 1], false)?; - let key = &path[path.len() - 1]; - if let Some(entry) = parent_table.remove(key.get()) { - match entry { - Item::Table(t) if t.implicit => { - self.current_table = t; - } - _ => return Err(duplicate_key(&path, path.len() - 1)), - } - } - - self.current_table_position += 1; - self.current_table.decor = decor; - self.current_table.set_position(self.current_table_position); - self.current_is_array = false; - self.current_table_path = path; - - Ok(()) - } - - pub(crate) fn finalize_table(&mut self) -> Result<(), CustomError> { - let mut table = std::mem::take(&mut self.current_table); - let path = std::mem::take(&mut self.current_table_path); - - let root = self.document.as_table_mut(); - if path.is_empty() { - assert!(root.is_empty()); - std::mem::swap(&mut table, root); - } else if self.current_is_array { - let parent_table = Self::descend_path(root, &path[..path.len() - 1], false)?; - let key = &path[path.len() - 1]; - - let entry = parent_table - .entry_format(key) - .or_insert(Item::ArrayOfTables(ArrayOfTables::new())); - let array = entry - .as_array_of_tables_mut() - .ok_or_else(|| duplicate_key(&path, path.len() - 1))?; - array.push(table); - } else { - let parent_table = Self::descend_path(root, &path[..path.len() - 1], false)?; - let key = &path[path.len() - 1]; - - let entry = parent_table.entry_format(key); - match entry { - Entry::Occupied(entry) => { - match entry.into_mut() { - // if [a.b.c] header preceded [a.b] - Item::Table(ref mut t) if t.implicit => { - std::mem::swap(t, &mut table); - } - _ => return Err(duplicate_key(&path, path.len() - 1)), - } - } - Entry::Vacant(entry) => { - let item = Item::Table(table); - entry.insert(item); - } - } - } - - Ok(()) - } -} - -impl Default for TomlParser { - fn default() -> Self { - Self { - document: Document::new(), - trailing: String::new(), - current_table_position: 0, - current_table: Table::new(), - current_is_array: false, - current_table_path: Vec::new(), - } - } -} - -#[cfg(test)] -mod tests { - use crate::parser::*; - use combine::stream::position::Stream; - use combine::*; - use snapbox::assert_eq; - use std::fmt; - // Copied from https://github.com/colin-kiegel/rust-pretty-assertions/issues/24 - /// Wrapper around string slice that makes debug output `{:?}` to print string same way as `{}`. - /// Used in different `assert*!` macros in combination with `pretty_assertions` crate to make - /// test failures to show nice diffs. - #[derive(PartialEq, Eq)] - struct PrettyString<'a>(pub(crate) &'a str); - /// Make diff to display string as multi-line string - impl<'a> fmt::Debug for PrettyString<'a> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str(self.0) - } - } - - macro_rules! parsed_eq { - ($parsed:ident, $expected:expr) => {{ - assert!($parsed.is_ok(), "{:?}", $parsed.err().unwrap()); - let (v, rest) = $parsed.unwrap(); - assert_eq!(v, $expected); - assert!(rest.input.is_empty()); - }}; - } - - macro_rules! parsed_float_eq { - ($input:ident, $expected:expr) => {{ - let parsed = numbers::float().easy_parse(Stream::new($input.as_bytes())); - let (v, rest) = match parsed { - Ok(parsed) => parsed, - Err(err) => { - panic!("Unexpected error for {:?}: {:?}", $input, err); - } - }; - if $expected.is_nan() { - assert!(v.is_nan()); - } else if $expected.is_infinite() { - assert!(v.is_infinite()); - assert_eq!($expected.is_sign_positive(), v.is_sign_positive()); - } else { - dbg!($expected); - dbg!(v); - assert!(($expected - v).abs() < std::f64::EPSILON); - } - assert!(rest.input.is_empty()); - }}; - } - - macro_rules! parsed_value_eq { - ($input:expr) => { - let parsed = value::value().easy_parse(Stream::new($input.as_bytes())); - let (v, rest) = match parsed { - Ok(parsed) => parsed, - Err(err) => { - panic!("Unexpected error for {:?}: {:?}", $input, err); - } - }; - assert_eq(v.to_string(), $input); - assert!(rest.input.is_empty()); - }; - } - - macro_rules! parsed_date_time_eq { - ($input:expr, $is:ident) => {{ - let parsed = value::value().easy_parse(Stream::new($input.as_bytes())); - assert!(parsed.is_ok()); - let (v, rest) = parsed.unwrap(); - assert_eq(v.to_string(), $input); - assert!(rest.input.is_empty()); - assert!(v.$is()); - }}; - } - - #[test] - fn integers() { - let cases = [ - ("+99", 99), - ("42", 42), - ("0", 0), - ("-17", -17), - ("1_000", 1_000), - ("5_349_221", 5_349_221), - ("1_2_3_4_5", 1_2_3_4_5), - ("0xF", 15), - ("0o0_755", 493), - ("0b1_0_1", 5), - (&std::i64::MIN.to_string()[..], std::i64::MIN), - (&std::i64::MAX.to_string()[..], std::i64::MAX), - ]; - for &(input, expected) in &cases { - let parsed = numbers::integer().easy_parse(Stream::new(input.as_bytes())); - parsed_eq!(parsed, expected); - } - - let overflow = "1000000000000000000000000000000000"; - let parsed = numbers::integer().easy_parse(Stream::new(overflow.as_bytes())); - assert!(parsed.is_err()); - } - - #[test] - fn floats() { - let cases = [ - ("+1.0", 1.0), - ("3.1419", 3.1419), - ("-0.01", -0.01), - ("5e+22", 5e+22), - ("1e6", 1e6), - ("-2E-2", -2E-2), - ("6.626e-34", 6.626e-34), - ("9_224_617.445_991_228_313", 9_224_617.445_991_227), - ("-1.7976931348623157e+308", std::f64::MIN), - ("1.7976931348623157e+308", std::f64::MAX), - ("nan", f64::NAN), - ("+nan", f64::NAN), - ("-nan", f64::NAN), - ("inf", f64::INFINITY), - ("+inf", f64::INFINITY), - ("-inf", f64::NEG_INFINITY), - // ("1e+400", std::f64::INFINITY), - ]; - for &(input, expected) in &cases { - parsed_float_eq!(input, expected); - } - } - - #[test] - fn basic_string() { - let input = - r#""I'm a string. \"You can quote me\". Name\tJos\u00E9\nLocation\tSF. \U0002070E""#; - let parsed = strings::string().easy_parse(Stream::new(input.as_bytes())); - parsed_eq!( - parsed, - "I\'m a string. \"You can quote me\". Name\tJosé\nLocation\tSF. \u{2070E}" - ); - } - - #[test] - fn ml_basic_string() { - let cases = [ - ( - r#"""" -Roses are red -Violets are blue""""#, - r#"Roses are red -Violets are blue"#, - ), - (r#"""" \""" """"#, " \"\"\" "), - (r#"""" \\""""#, " \\"), - ]; - - for &(input, expected) in &cases { - dbg!(input); - let parsed = strings::string().easy_parse(Stream::new(input.as_bytes())); - parsed_eq!(parsed, expected); - } - - let invalid_cases = [r#"""" """#, r#"""" \""""#]; - - for input in &invalid_cases { - let parsed = strings::ml_basic_string().easy_parse(Stream::new(input.as_bytes())); - assert!(parsed.is_err()); - } - } - - #[test] - fn ml_basic_string_escape_ws() { - let inputs = [ - r#"""" -The quick brown \ - - - fox jumps over \ - the lazy dog.""""#, - r#""""\ - The quick brown \ - fox jumps over \ - the lazy dog.\ - """"#, - ]; - for input in &inputs { - dbg!(input); - let parsed = strings::string().easy_parse(Stream::new(input.as_bytes())); - parsed_eq!(parsed, "The quick brown fox jumps over the lazy dog."); - } - let empties = [ - r#""""\ - """"#, - r#"""" -\ - \ -""""#, - ]; - for empty in &empties { - let parsed = strings::string().easy_parse(Stream::new(empty.as_bytes())); - parsed_eq!(parsed, ""); - } - } - - #[test] - fn literal_string() { - let inputs = [ - r#"'C:\Users\nodejs\templates'"#, - r#"'\\ServerX\admin$\system32\'"#, - r#"'Tom "Dubs" Preston-Werner'"#, - r#"'<\i\c*\s*>'"#, - ]; - - for input in &inputs { - let parsed = strings::string().easy_parse(Stream::new(input.as_bytes())); - parsed_eq!(parsed, &input[1..input.len() - 1]); - } - } - - #[test] - fn ml_literal_string() { - let input = r#"'''I [dw]on't need \d{2} apples'''"#; - let parsed = strings::string().easy_parse(Stream::new(input.as_bytes())); - parsed_eq!(parsed, &input[3..input.len() - 3]); - let input = r#"''' -The first newline is -trimmed in raw strings. - All other whitespace - is preserved. -'''"#; - let parsed = strings::string().easy_parse(Stream::new(input.as_bytes())); - parsed_eq!(parsed, &input[4..input.len() - 3]); - } - - #[test] - fn offset_date_time() { - let inputs = [ - "1979-05-27T07:32:00Z", - "1979-05-27T00:32:00-07:00", - "1979-05-27T00:32:00.999999-07:00", - ]; - for input in inputs { - parsed_date_time_eq!(input, is_datetime); - } - } - - #[test] - fn local_date_time() { - let inputs = ["1979-05-27T07:32:00", "1979-05-27T00:32:00.999999"]; - for input in inputs { - parsed_date_time_eq!(input, is_datetime); - } - } - - #[test] - fn local_date() { - let inputs = ["1979-05-27", "2017-07-20"]; - for input in inputs { - parsed_date_time_eq!(input, is_datetime); - } - } - - #[test] - fn local_time() { - let inputs = ["07:32:00", "00:32:00.999999"]; - for input in inputs { - parsed_date_time_eq!(input, is_datetime); - } - } - - #[test] - fn time_fraction_truncated() { - let input = "1987-07-05T17:45:00.123456789012345Z"; - parsed_date_time_eq!(input, is_datetime); - } - - #[test] - fn trivia() { - let inputs = [ - "", - r#" "#, - r#" -"#, - r#" -# comment - -# comment2 - - -"#, - r#" - "#, - r#"# comment -# comment2 - - - "#, - ]; - for input in inputs { - let parsed = trivia::ws_comment_newline().easy_parse(Stream::new(input.as_bytes())); - assert!(parsed.is_ok()); - let (t, rest) = parsed.unwrap(); - assert!(rest.input.is_empty()); - assert_eq(t, input.as_bytes()); - } - } - - #[test] - fn arrays() { - let inputs = [ - r#"[]"#, - r#"[ ]"#, - r#"[ - 1, 2, 3 -]"#, - r#"[ - 1, - 2, # this is ok -]"#, - r#"[# comment -# comment2 - - - ]"#, - r#"[# comment -# comment2 - 1 - -#sd -, -# comment3 - - ]"#, - r#"[1]"#, - r#"[1,]"#, - r#"[ "all", 'strings', """are the same""", '''type''']"#, - r#"[ 100, -2,]"#, - r#"[1, 2, 3]"#, - r#"[1.1, 2.1, 3.1]"#, - r#"["a", "b", "c"]"#, - r#"[ [ 1, 2 ], [3, 4, 5] ]"#, - r#"[ [ 1, 2 ], ["a", "b", "c"] ]"#, - r#"[ { x = 1, a = "2" }, {a = "a",b = "b", c = "c"} ]"#, - ]; - for input in inputs { - parsed_value_eq!(input); - } - - let invalid_inputs = [r#"["#, r#"[,]"#, r#"[,2]"#, r#"[1e165,,]"#]; - for input in invalid_inputs { - let parsed = array::array().easy_parse(Stream::new(input.as_bytes())); - assert!(parsed.is_err()); - } - } - - #[test] - fn inline_tables() { - let inputs = [ - r#"{}"#, - r#"{ }"#, - r#"{a = 1e165}"#, - r#"{ hello = "world", a = 1}"#, - r#"{ hello.world = "a" }"#, - ]; - for input in inputs { - parsed_value_eq!(input); - } - let invalid_inputs = [r#"{a = 1e165"#, r#"{ hello = "world", a = 2, hello = 1}"#]; - for input in invalid_inputs { - let parsed = inline_table::inline_table().easy_parse(Stream::new(input.as_bytes())); - assert!(parsed.is_err()); - } - } - - #[test] - fn keys() { - let cases = [ - ("a", "a"), - (r#""hello\n ""#, "hello\n "), - (r#"'hello\n '"#, "hello\\n "), - ]; - - for (input, expected) in cases { - let parsed = key::simple_key().easy_parse(Stream::new(input.as_bytes())); - assert!(parsed.is_ok()); - let ((.., k), rest) = parsed.unwrap(); - assert_eq(k.as_str(), expected); - assert_eq!(rest.input.len(), 0); - } - } - - #[test] - fn values() { - let inputs = [ - "1979-05-27T00:32:00.999999", - "-239", - "1e200", - "9_224_617.445_991_228_313", - r#"'''I [dw]on't need \d{2} apples'''"#, - r#"''' -The first newline is -trimmed in raw strings. - All other whitespace - is preserved. -'''"#, - r#""Jos\u00E9\n""#, - r#""\\\"\b/\f\n\r\t\u00E9\U000A0000""#, - r#"{ hello = "world", a = 1}"#, - r#"[ { x = 1, a = "2" }, {a = "a",b = "b", c = "c"} ]"#, - ]; - for input in inputs { - parsed_value_eq!(input); - } - } - - #[test] - fn documents() { - let documents = [ - r#" -# This is a TOML document. - -title = "TOML Example" - - [owner] - name = "Tom Preston-Werner" - dob = 1979-05-27T07:32:00-08:00 # First class dates - - [database] - server = "192.168.1.1" - ports = [ 8001, 8001, 8002 ] - connection_max = 5000 - enabled = true - - [servers] - - # Indentation (tabs and/or spaces) is allowed but not required -[servers.alpha] - ip = "10.0.0.1" - dc = "eqdc10" - - [servers.beta] - ip = "10.0.0.2" - dc = "eqdc10" - - [clients] - data = [ ["gamma", "delta"], [1, 2] ] - - # Line breaks are OK when inside arrays -hosts = [ - "alpha", - "omega" -] - - 'some.wierd .stuff' = """ - like - that - # """ # this broke my sintax highlighting - " also. like " = ''' -that -''' - double = 2e39 # this number looks familiar -# trailing comment"#, - r#""#, - r#" "#, - r#" hello = 'darkness' # my old friend -"#, - r#"[parent . child] -key = "value" -"#, - r#"hello.world = "a" -"#, - r#"foo = 1979-05-27 # Comment -"#, - ]; - for document in documents { - let doc = TomlParser::parse(document.as_bytes()); - let doc = match doc { - Ok(doc) => doc, - Err(err) => { - panic!( - "Parse error: {}\nFailed to parse:\n```\n{}\n```", - err, document - ) - } - }; - - dbg!(doc.to_string()); - dbg!(document); - assert_eq(document, doc.to_string()); - } - - let parse_only = ["\u{FEFF} -[package] -name = \"foo\" -version = \"0.0.1\" -authors = [] -"]; - for document in parse_only { - let doc = TomlParser::parse(document.as_bytes()); - match doc { - Ok(_) => (), - Err(err) => { - panic!( - "Parse error: {}\nFailed to parse:\n```\n{}\n```", - err, document - ) - } - } - } - - let invalid_inputs = [r#" hello = 'darkness' # my old friend -$"#]; - for document in invalid_inputs { - let doc = TomlParser::parse(document.as_bytes()); - - assert!(doc.is_err()); - } - } -} +use self::state::ParseState; diff --git a/crates/toml_edit/src/parser/numbers.rs b/crates/toml_edit/src/parser/numbers.rs index a99a82ff..58a2c4a3 100644 --- a/crates/toml_edit/src/parser/numbers.rs +++ b/crates/toml_edit/src/parser/numbers.rs @@ -1,9 +1,10 @@ -use crate::parser::trivia::from_utf8_unchecked; use combine::parser::byte::{byte, bytes, digit, hex_digit, oct_digit}; use combine::parser::range::{range, recognize}; use combine::stream::RangeStream; use combine::*; +use crate::parser::trivia::from_utf8_unchecked; + // ;; Boolean // boolean = true / false @@ -193,3 +194,63 @@ pub(crate) const NAN: &[u8] = b"nan"; parse!(nan() -> f64, { bytes(NAN).map(|_| f64::NAN) }); + +#[cfg(test)] +mod test { + use super::*; + + use crate::parser::*; + use combine::stream::position::Stream; + + #[test] + fn integers() { + let cases = [ + ("+99", 99), + ("42", 42), + ("0", 0), + ("-17", -17), + ("1_000", 1_000), + ("5_349_221", 5_349_221), + ("1_2_3_4_5", 1_2_3_4_5), + ("0xF", 15), + ("0o0_755", 493), + ("0b1_0_1", 5), + (&std::i64::MIN.to_string()[..], std::i64::MIN), + (&std::i64::MAX.to_string()[..], std::i64::MAX), + ]; + for &(input, expected) in &cases { + let parsed = numbers::integer().easy_parse(Stream::new(input.as_bytes())); + parsed_eq!(parsed, expected); + } + + let overflow = "1000000000000000000000000000000000"; + let parsed = numbers::integer().easy_parse(Stream::new(overflow.as_bytes())); + assert!(parsed.is_err()); + } + + #[test] + fn floats() { + let cases = [ + ("+1.0", 1.0), + ("3.1419", 3.1419), + ("-0.01", -0.01), + ("5e+22", 5e+22), + ("1e6", 1e6), + ("-2E-2", -2E-2), + ("6.626e-34", 6.626e-34), + ("9_224_617.445_991_228_313", 9_224_617.445_991_227), + ("-1.7976931348623157e+308", std::f64::MIN), + ("1.7976931348623157e+308", std::f64::MAX), + ("nan", f64::NAN), + ("+nan", f64::NAN), + ("-nan", f64::NAN), + ("inf", f64::INFINITY), + ("+inf", f64::INFINITY), + ("-inf", f64::NEG_INFINITY), + // ("1e+400", std::f64::INFINITY), + ]; + for &(input, expected) in &cases { + parsed_float_eq!(input, expected); + } + } +} diff --git a/crates/toml_edit/src/parser/state.rs b/crates/toml_edit/src/parser/state.rs new file mode 100644 index 00000000..def0e151 --- /dev/null +++ b/crates/toml_edit/src/parser/state.rs @@ -0,0 +1,253 @@ +use crate::key::Key; +use crate::parser::errors::CustomError; +use crate::repr::Decor; +use crate::table::TableKeyValue; +use crate::{ArrayOfTables, Document, InternalString, Item, Table}; + +pub(crate) struct ParseState { + document: Document, + trailing: String, + current_table_position: usize, + current_table: Table, + current_is_array: bool, + current_table_path: Vec, +} + +impl ParseState { + pub(crate) fn into_document(mut self) -> Result { + self.finalize_table()?; + let trailing = self.trailing.as_str().into(); + self.document.trailing = trailing; + Ok(self.document) + } + + pub(crate) fn on_ws(&mut self, w: &str) { + self.trailing.push_str(w); + } + + pub(crate) fn on_comment(&mut self, c: &str, e: &str) { + self.trailing = [&self.trailing, c, e].concat(); + } + + pub(crate) fn on_keyval( + &mut self, + mut path: Vec, + mut kv: TableKeyValue, + ) -> Result<(), CustomError> { + { + let prefix = std::mem::take(&mut self.trailing); + let first_key = if path.is_empty() { + &mut kv.key + } else { + &mut path[0] + }; + first_key + .decor + .set_prefix(prefix + first_key.decor.prefix().unwrap_or_default()); + } + + let table = &mut self.current_table; + let table = Self::descend_path(table, &path, true)?; + + // "Likewise, using dotted keys to redefine tables already defined in [table] form is not allowed" + let mixed_table_types = table.is_dotted() == path.is_empty(); + if mixed_table_types { + return Err(CustomError::DuplicateKey { + key: kv.key.get().into(), + table: None, + }); + } + + let key: InternalString = kv.key.get_internal().into(); + match table.items.entry(key) { + indexmap::map::Entry::Vacant(o) => { + o.insert(kv); + } + indexmap::map::Entry::Occupied(o) => { + // "Since tables cannot be defined more than once, redefining such tables using a [table] header is not allowed" + return Err(CustomError::DuplicateKey { + key: o.key().as_str().into(), + table: Some(self.current_table_path.clone()), + }); + } + } + + Ok(()) + } + + pub(crate) fn start_aray_table( + &mut self, + path: Vec, + decor: Decor, + ) -> Result<(), CustomError> { + debug_assert!(!path.is_empty()); + debug_assert!(self.current_table.is_empty()); + debug_assert!(self.current_table_path.is_empty()); + + // Look up the table on start to ensure the duplicate_key error points to the right line + let root = self.document.as_table_mut(); + let parent_table = Self::descend_path(root, &path[..path.len() - 1], false)?; + let key = &path[path.len() - 1]; + let entry = parent_table + .entry_format(key) + .or_insert(Item::ArrayOfTables(ArrayOfTables::new())); + entry + .as_array_of_tables() + .ok_or_else(|| CustomError::duplicate_key(&path, path.len() - 1))?; + + self.current_table_position += 1; + self.current_table.decor = decor; + self.current_table.set_position(self.current_table_position); + self.current_is_array = true; + self.current_table_path = path; + + Ok(()) + } + + pub(crate) fn start_table(&mut self, path: Vec, decor: Decor) -> Result<(), CustomError> { + debug_assert!(!path.is_empty()); + debug_assert!(self.current_table.is_empty()); + debug_assert!(self.current_table_path.is_empty()); + + // 1. Look up the table on start to ensure the duplicate_key error points to the right line + // 2. Ensure any child tables from an implicit table are preserved + let root = self.document.as_table_mut(); + let parent_table = Self::descend_path(root, &path[..path.len() - 1], false)?; + let key = &path[path.len() - 1]; + if let Some(entry) = parent_table.remove(key.get()) { + match entry { + Item::Table(t) if t.implicit => { + self.current_table = t; + } + _ => return Err(CustomError::duplicate_key(&path, path.len() - 1)), + } + } + + self.current_table_position += 1; + self.current_table.decor = decor; + self.current_table.set_position(self.current_table_position); + self.current_is_array = false; + self.current_table_path = path; + + Ok(()) + } + + pub(crate) fn finalize_table(&mut self) -> Result<(), CustomError> { + let mut table = std::mem::take(&mut self.current_table); + let path = std::mem::take(&mut self.current_table_path); + + let root = self.document.as_table_mut(); + if path.is_empty() { + assert!(root.is_empty()); + std::mem::swap(&mut table, root); + } else if self.current_is_array { + let parent_table = Self::descend_path(root, &path[..path.len() - 1], false)?; + let key = &path[path.len() - 1]; + + let entry = parent_table + .entry_format(key) + .or_insert(Item::ArrayOfTables(ArrayOfTables::new())); + let array = entry + .as_array_of_tables_mut() + .ok_or_else(|| CustomError::duplicate_key(&path, path.len() - 1))?; + array.push(table); + } else { + let parent_table = Self::descend_path(root, &path[..path.len() - 1], false)?; + let key = &path[path.len() - 1]; + + let entry = parent_table.entry_format(key); + match entry { + crate::Entry::Occupied(entry) => { + match entry.into_mut() { + // if [a.b.c] header preceded [a.b] + Item::Table(ref mut t) if t.implicit => { + std::mem::swap(t, &mut table); + } + _ => return Err(CustomError::duplicate_key(&path, path.len() - 1)), + } + } + crate::Entry::Vacant(entry) => { + let item = Item::Table(table); + entry.insert(item); + } + } + } + + Ok(()) + } + + pub(crate) fn descend_path<'t, 'k>( + mut table: &'t mut Table, + path: &'k [Key], + dotted: bool, + ) -> Result<&'t mut Table, CustomError> { + for (i, key) in path.iter().enumerate() { + let entry = table.entry_format(key).or_insert_with(|| { + let mut new_table = Table::new(); + new_table.set_implicit(true); + new_table.set_dotted(dotted); + + Item::Table(new_table) + }); + match *entry { + Item::Value(ref v) => { + return Err(CustomError::extend_wrong_type(path, i, v.type_name())); + } + Item::ArrayOfTables(ref mut array) => { + debug_assert!(!array.is_empty()); + + let index = array.len() - 1; + let last_child = array.get_mut(index).unwrap(); + + table = last_child; + } + Item::Table(ref mut sweet_child_of_mine) => { + table = sweet_child_of_mine; + } + _ => unreachable!(), + } + } + Ok(table) + } + + pub(crate) fn on_std_header( + &mut self, + path: Vec, + trailing: &str, + ) -> Result<(), CustomError> { + debug_assert!(!path.is_empty()); + + self.finalize_table()?; + let leading = std::mem::take(&mut self.trailing); + self.start_table(path, Decor::new(leading, trailing))?; + + Ok(()) + } + + pub(crate) fn on_array_header( + &mut self, + path: Vec, + trailing: &str, + ) -> Result<(), CustomError> { + debug_assert!(!path.is_empty()); + + self.finalize_table()?; + let leading = std::mem::take(&mut self.trailing); + self.start_aray_table(path, Decor::new(leading, trailing))?; + + Ok(()) + } +} + +impl Default for ParseState { + fn default() -> Self { + Self { + document: Document::new(), + trailing: String::new(), + current_table_position: 0, + current_table: Table::new(), + current_is_array: false, + current_table_path: Vec::new(), + } + } +} diff --git a/crates/toml_edit/src/parser/strings.rs b/crates/toml_edit/src/parser/strings.rs index dc50fafb..3b078976 100644 --- a/crates/toml_edit/src/parser/strings.rs +++ b/crates/toml_edit/src/parser/strings.rs @@ -1,14 +1,16 @@ -use crate::parser::errors::CustomError; -use crate::parser::trivia::{ - from_utf8_unchecked, is_non_ascii, is_wschar, newline, ws, ws_newlines, -}; +use std::borrow::Cow; +use std::char; + use combine::error::Commit; use combine::parser::byte::{byte, bytes, hex_digit}; use combine::parser::range::{range, recognize, take_while, take_while1}; use combine::stream::RangeStream; use combine::*; -use std::borrow::Cow; -use std::char; + +use crate::parser::errors::CustomError; +use crate::parser::trivia::{ + from_utf8_unchecked, is_non_ascii, is_wschar, newline, ws, ws_newlines, +}; // ;; String @@ -292,3 +294,114 @@ parse!(mll_quotes() -> &'a str, { unsafe { from_utf8_unchecked(b, "`bytes` out npn-ASCII") } }) }); + +#[cfg(test)] +mod test { + use super::*; + + use crate::parser::*; + use combine::stream::position::Stream; + + #[test] + fn basic_string() { + let input = + r#""I'm a string. \"You can quote me\". Name\tJos\u00E9\nLocation\tSF. \U0002070E""#; + let parsed = strings::string().easy_parse(Stream::new(input.as_bytes())); + parsed_eq!( + parsed, + "I\'m a string. \"You can quote me\". Name\tJosé\nLocation\tSF. \u{2070E}" + ); + } + + #[test] + fn ml_basic_string() { + let cases = [ + ( + r#"""" +Roses are red +Violets are blue""""#, + r#"Roses are red +Violets are blue"#, + ), + (r#"""" \""" """"#, " \"\"\" "), + (r#"""" \\""""#, " \\"), + ]; + + for &(input, expected) in &cases { + dbg!(input); + let parsed = strings::string().easy_parse(Stream::new(input.as_bytes())); + parsed_eq!(parsed, expected); + } + + let invalid_cases = [r#"""" """#, r#"""" \""""#]; + + for input in &invalid_cases { + let parsed = strings::ml_basic_string().easy_parse(Stream::new(input.as_bytes())); + assert!(parsed.is_err()); + } + } + + #[test] + fn ml_basic_string_escape_ws() { + let inputs = [ + r#"""" +The quick brown \ + + + fox jumps over \ + the lazy dog.""""#, + r#""""\ + The quick brown \ + fox jumps over \ + the lazy dog.\ + """"#, + ]; + for input in &inputs { + dbg!(input); + let parsed = strings::string().easy_parse(Stream::new(input.as_bytes())); + parsed_eq!(parsed, "The quick brown fox jumps over the lazy dog."); + } + let empties = [ + r#""""\ + """"#, + r#"""" +\ + \ +""""#, + ]; + for empty in &empties { + let parsed = strings::string().easy_parse(Stream::new(empty.as_bytes())); + parsed_eq!(parsed, ""); + } + } + + #[test] + fn literal_string() { + let inputs = [ + r#"'C:\Users\nodejs\templates'"#, + r#"'\\ServerX\admin$\system32\'"#, + r#"'Tom "Dubs" Preston-Werner'"#, + r#"'<\i\c*\s*>'"#, + ]; + + for input in &inputs { + let parsed = strings::string().easy_parse(Stream::new(input.as_bytes())); + parsed_eq!(parsed, &input[1..input.len() - 1]); + } + } + + #[test] + fn ml_literal_string() { + let input = r#"'''I [dw]on't need \d{2} apples'''"#; + let parsed = strings::string().easy_parse(Stream::new(input.as_bytes())); + parsed_eq!(parsed, &input[3..input.len() - 3]); + let input = r#"''' +The first newline is +trimmed in raw strings. + All other whitespace + is preserved. +'''"#; + let parsed = strings::string().easy_parse(Stream::new(input.as_bytes())); + parsed_eq!(parsed, &input[4..input.len() - 3]); + } +} diff --git a/crates/toml_edit/src/parser/table.rs b/crates/toml_edit/src/parser/table.rs index 6c17df75..d4807453 100644 --- a/crates/toml_edit/src/parser/table.rs +++ b/crates/toml_edit/src/parser/table.rs @@ -1,19 +1,13 @@ -use crate::key::Key; -use crate::parser::errors::CustomError; -use crate::parser::key::key; -use crate::parser::trivia::line_trailing; -use crate::parser::TomlParser; -use crate::repr::Decor; -use crate::{Item, Table}; +use std::cell::RefCell; + use combine::parser::byte::byte; use combine::parser::range::range; use combine::stream::RangeStream; use combine::*; -use std::cell::RefCell; -use std::mem; -// https://github.com/rust-lang/rust/issues/41358 -#[allow(unused_imports)] -use std::ops::DerefMut; + +use crate::parser::key::key; +use crate::parser::trivia::line_trailing; +use crate::parser::ParseState; // std-table-open = %x5B ws ; [ Left square bracket pub(crate) const STD_TABLE_OPEN: u8 = b'['; @@ -32,7 +26,7 @@ toml_parser!(std_table, parser, { between(byte(STD_TABLE_OPEN), byte(STD_TABLE_CLOSE), key()), line_trailing().and_then(std::str::from_utf8), ) - .and_then(|(h, t)| parser.borrow_mut().deref_mut().on_std_header(h, t)) + .and_then(|(h, t)| parser.borrow_mut().on_std_header(h, t)) }); // ;; Array Table @@ -43,14 +37,14 @@ toml_parser!(array_table, parser, { between(range(ARRAY_TABLE_OPEN), range(ARRAY_TABLE_CLOSE), key()), line_trailing().and_then(std::str::from_utf8), ) - .and_then(|(h, t)| parser.borrow_mut().deref_mut().on_array_header(h, t)) + .and_then(|(h, t)| parser.borrow_mut().on_array_header(h, t)) }); // ;; Table // table = std-table / array-table parser! { - pub(crate) fn table['a, 'b, I](parser: &'b RefCell)(I) -> () + pub(crate) fn table['a, 'b, I](parser: &'b RefCell)(I) -> () where [I: RangeStream< Range = &'a [u8], @@ -67,75 +61,3 @@ parser! { .message("While parsing a Table Header") } } - -pub(crate) fn duplicate_key(path: &[Key], i: usize) -> CustomError { - assert!(i < path.len()); - CustomError::DuplicateKey { - key: path[i].to_repr().as_ref().as_raw().into(), - table: Some(path[..i].to_vec()), - } -} - -pub(crate) fn extend_wrong_type(path: &[Key], i: usize, actual: &'static str) -> CustomError { - assert!(i < path.len()); - CustomError::DottedKeyExtendWrongType { - key: path[..=i].to_vec(), - actual, - } -} - -impl TomlParser { - pub(crate) fn descend_path<'t, 'k>( - mut table: &'t mut Table, - path: &'k [Key], - dotted: bool, - ) -> Result<&'t mut Table, CustomError> { - for (i, key) in path.iter().enumerate() { - let entry = table.entry_format(key).or_insert_with(|| { - let mut new_table = Table::new(); - new_table.set_implicit(true); - new_table.set_dotted(dotted); - - Item::Table(new_table) - }); - match *entry { - Item::Value(ref v) => { - return Err(extend_wrong_type(path, i, v.type_name())); - } - Item::ArrayOfTables(ref mut array) => { - debug_assert!(!array.is_empty()); - - let index = array.len() - 1; - let last_child = array.get_mut(index).unwrap(); - - table = last_child; - } - Item::Table(ref mut sweet_child_of_mine) => { - table = sweet_child_of_mine; - } - _ => unreachable!(), - } - } - Ok(table) - } - - fn on_std_header(&mut self, path: Vec, trailing: &str) -> Result<(), CustomError> { - debug_assert!(!path.is_empty()); - - self.finalize_table()?; - let leading = mem::take(&mut self.trailing); - self.start_table(path, Decor::new(leading, trailing))?; - - Ok(()) - } - - fn on_array_header(&mut self, path: Vec, trailing: &str) -> Result<(), CustomError> { - debug_assert!(!path.is_empty()); - - self.finalize_table()?; - let leading = mem::take(&mut self.trailing); - self.start_aray_table(path, Decor::new(leading, trailing))?; - - Ok(()) - } -} diff --git a/crates/toml_edit/src/parser/trivia.rs b/crates/toml_edit/src/parser/trivia.rs index f50df7c1..93b001db 100644 --- a/crates/toml_edit/src/parser/trivia.rs +++ b/crates/toml_edit/src/parser/trivia.rs @@ -122,3 +122,43 @@ parse!(line_trailing() -> &'a [u8], { optional(comment()), )).skip(line_ending()) }); + +#[cfg(test)] +mod test { + use super::*; + + use crate::parser::*; + use combine::stream::position::Stream; + use snapbox::assert_eq; + + #[test] + fn trivia() { + let inputs = [ + "", + r#" "#, + r#" +"#, + r#" +# comment + +# comment2 + + +"#, + r#" + "#, + r#"# comment +# comment2 + + + "#, + ]; + for input in inputs { + let parsed = trivia::ws_comment_newline().easy_parse(Stream::new(input.as_bytes())); + assert!(parsed.is_ok()); + let (t, rest) = parsed.unwrap(); + assert!(rest.input.is_empty()); + assert_eq(t, input.as_bytes()); + } + } +} diff --git a/crates/toml_edit/src/parser/value.rs b/crates/toml_edit/src/parser/value.rs index 6fcbc75d..d55713dd 100644 --- a/crates/toml_edit/src/parser/value.rs +++ b/crates/toml_edit/src/parser/value.rs @@ -1,3 +1,7 @@ +use combine::parser::range::recognize_with_value; +use combine::stream::RangeStream; +use combine::*; + use crate::parser::array::array; use crate::parser::datetime::date_time; use crate::parser::inline_table::inline_table; @@ -7,9 +11,6 @@ use crate::parser::trivia::from_utf8_unchecked; use crate::repr::{Formatted, Repr}; use crate::value as v; use crate::Value; -use combine::parser::range::recognize_with_value; -use combine::stream::RangeStream; -use combine::*; // val = string / boolean / array / inline-table / date-time / float / integer parse!(value() -> v::Value, { @@ -84,3 +85,30 @@ fn apply_raw(mut val: Value, raw: &[u8]) -> Result { val.decorate("", ""); Ok(val) } + +#[cfg(test)] +mod test { + #[test] + fn values() { + let inputs = [ + "1979-05-27T00:32:00.999999", + "-239", + "1e200", + "9_224_617.445_991_228_313", + r#"'''I [dw]on't need \d{2} apples'''"#, + r#"''' +The first newline is +trimmed in raw strings. + All other whitespace + is preserved. +'''"#, + r#""Jos\u00E9\n""#, + r#""\\\"\b/\f\n\r\t\u00E9\U000A0000""#, + r#"{ hello = "world", a = 1}"#, + r#"[ { x = 1, a = "2" }, {a = "a",b = "b", c = "c"} ]"#, + ]; + for input in inputs { + parsed_value_eq!(input); + } + } +}