diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 5897576f..062936e9 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -4,8 +4,8 @@ Before diving into the whys and hows, it's best one got started with the what's. The best place to learn about what pest does and what its limits are is the [book]. Feel free to try any of the examples in the [fiddle editor] as well. -[book]: https://pest-parser.github.io/book -[fiddle editor]: https://pest-parser.github.io/#editor +[book]: https://pest.rs/book +[fiddle editor]: https://pest.rs/#editor With that out of the way, let's go through *pest's* crate structure: @@ -46,9 +46,14 @@ Our [website] and [book] are in constant need of attention. While not as well or [website]:https://github.com/pest-parser/site [book]: https://github.com/pest-parser/book -## Gitter +## Gitter, Discord and GitHub Discussions -Sometimes it's best to just say what you want. For that, there's our [Gitter] room. Leave feedback, help out, learn what people are up to, go off-topic for hours, or complain that compile times are terrible—seriously, please don't. +Sometimes it's best to just say what you want. For that, there's our [Gitter] room or [Discord] server. Leave feedback, help out, learn what people are up to, go off-topic for hours, or complain that compile times are terrible—seriously, please don't. + +For more long-living threads and common questions, you can use [GitHub Discussions]. [Gitter]: https://gitter.im/pest-parser/pest +[Discord]: https://discord.gg/XEGACtWpT2 + +[GitHub Discussions]: https://github.com/pest-parser/pest/discussions \ No newline at end of file diff --git a/README.md b/README.md index ddc870c9..da30ab74 100644 --- a/README.md +++ b/README.md @@ -5,8 +5,8 @@ # pest. The Elegant Parser -[![Join the chat at https://gitter.im/dragostis/pest](https://badges.gitter.im/dragostis/pest.svg)](https://gitter.im/dragostis/pest?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) -[![Book](https://img.shields.io/badge/book-WIP-4d76ae.svg)](https://pest-parser.github.io/book) +[![Join the chat at https://gitter.im/pest-parser/pest](https://badges.gitter.im/dragostis/pest.svg)](https://gitter.im/pest-parser/pest?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) +[![Book](https://img.shields.io/badge/book-WIP-4d76ae.svg)](https://pest.rs/book) [![Docs](https://docs.rs/pest/badge.svg)](https://docs.rs/pest) [![pest Continuous Integration](https://github.com/pest-parser/pest/actions/workflows/ci.yml/badge.svg)](https://github.com/pest-parser/pest/actions/workflows/ci.yml) @@ -31,12 +31,15 @@ Other helpful resources: * API reference on [docs.rs] * play with grammars and share them on our [fiddle] -* leave feedback, ask questions, or greet us on [Gitter] +* find previous common questions answered or ask questions on [GitHub Discussions] +* leave feedback, ask questions, or greet us on [Gitter] or [Discord] -[book]: https://pest-parser.github.io/book +[book]: https://pest.rs/book [docs.rs]: https://docs.rs/pest -[fiddle]: https://pest-parser.github.io/#editor -[Gitter]: https://gitter.im/dragostis/pest +[fiddle]: https://pest.rs/#editor +[Gitter]: https://gitter.im/pest-parser/pest +[Discord]: https://discord.gg/XEGACtWpT2 +[GitHub Discussions]: https://github.com/pest-parser/pest/discussions ## Example @@ -81,6 +84,9 @@ thread 'main' panicked at ' --> 1:1 = expected ident', src/main.rs:12 ``` +These error messages can be obtained from their default `Display` implementation, +e.g. `panic!("{}", parser_result.unwrap_err())` or `println!("{}", e)`. + ## Pairs API The grammar can be used to derive a `Parser` implementation automatically. @@ -133,6 +139,25 @@ Letter: b Digit: 2 ``` +### Defining multiple parsers in a single file +The current automatic `Parser` derivation will produce the `Rule` enum +which would have name conflicts if one tried to define multiple such structs +that automatically derive `Parser`. One possible way around it is to put each +parser struct in a separate namespace: + +```rust +mod a { + #[derive(Parser)] + #[grammar = "a.pest"] + pub struct ParserA; +} +mod b { + #[derive(Parser)] + #[grammar = "b.pest"] + pub struct ParserB; +} +``` + ## Other features * Precedence climbing diff --git a/bootstrap/Cargo.toml b/bootstrap/Cargo.toml index 6b6d4188..c20138f4 100644 --- a/bootstrap/Cargo.toml +++ b/bootstrap/Cargo.toml @@ -4,7 +4,7 @@ description = "pest bootstrap script" version = "0.0.0" edition = "2021" authors = ["Dragoș Tiselice "] -homepage = "https://pest-parser.github.io/" +homepage = "https://pest.rs/" repository = "https://github.com/pest-parser/pest" documentation = "https://docs.rs/pest" publish = false diff --git a/derive/Cargo.toml b/derive/Cargo.toml index e0c437bd..18c448b0 100644 --- a/derive/Cargo.toml +++ b/derive/Cargo.toml @@ -1,10 +1,10 @@ [package] name = "pest_derive" description = "pest's derive macro" -version = "2.4.0" +version = "2.4.1" edition = "2021" authors = ["Dragoș Tiselice "] -homepage = "https://pest-parser.github.io/" +homepage = "https://pest.rs/" repository = "https://github.com/pest-parser/pest" documentation = "https://docs.rs/pest" keywords = ["pest", "parser", "peg", "grammar"] @@ -23,5 +23,5 @@ std = ["pest/std", "pest_generator/std"] [dependencies] # for tests, included transitively anyway -pest = { path = "../pest", version = "2.4.0", default-features = false } -pest_generator = { path = "../generator", version = "2.4.0", default-features = false } +pest = { path = "../pest", version = "2.4.1", default-features = false } +pest_generator = { path = "../generator", version = "2.4.1", default-features = false } diff --git a/derive/src/lib.rs b/derive/src/lib.rs index d980c6ae..feccc3a7 100644 --- a/derive/src/lib.rs +++ b/derive/src/lib.rs @@ -6,7 +6,12 @@ // license , at your // option. All files in the project carrying such notice may not be copied, // modified, or distributed except according to those terms. - +#![doc( + html_root_url = "https://docs.rs/pest_derive", + html_logo_url = "https://raw.githubusercontent.com/pest-parser/pest/master/pest-logo.svg", + html_favicon_url = "https://raw.githubusercontent.com/pest-parser/pest/master/pest-logo.svg" +)] +#![warn(missing_docs, rust_2018_idioms, unused_qualifications)] //! # pest. The Elegant Parser //! //! pest is a general purpose parser written in Rust with a focus on accessibility, correctness, @@ -24,12 +29,16 @@ //! //! * API reference on [docs.rs] //! * play with grammars and share them on our [fiddle] -//! * leave feedback, ask questions, or greet us on [Gitter] +//! * find previous common questions answered or ask questions on [GitHub Discussions] +//! * leave feedback, ask questions, or greet us on [Gitter] or [Discord] //! -//! [book]: https://pest-parser.github.io/book +//! [book]: https://pest.rs/book //! [docs.rs]: https://docs.rs/pest -//! [fiddle]: https://pest-parser.github.io/#editor -//! [Gitter]: https://gitter.im/dragostis/pest +//! [fiddle]: https://pest.rs/#editor +//! [Gitter]: https://gitter.im/pest-parser/pest +//! [Discord]: https://discord.gg/XEGACtWpT2 +//! [GitHub Discussions]: https://github.com/pest-parser/pest/discussions +//! //! //! ## `.pest` files //! @@ -181,6 +190,10 @@ //! `e1` did. Repetitions and optionals (`e*`, `e+`, `e{, n}`, `e{n,}`, //! `e{m,n}`, `e?`) can modify the stack each time `e` matches. The `!e` and `&e` //! expressions are a special case; they never modify the stack. +//! Many languages have "keyword" tokens (e.g. if, for, while) as well as general +//! tokens (e.g. identifier) that matches any word. In order to match a keyword, +//! generally, you may need to restrict that is not immediately followed by another +//! letter or digit (otherwise it would be matched as an identifier). //! //! ## Special rules //! @@ -289,12 +302,10 @@ //! * `ASCII` - matches a character from \x00..\x7f //! * `NEWLINE` - matches either "\n" or "\r\n" or "\r" -#![doc(html_root_url = "https://docs.rs/pest_derive")] -extern crate pest_generator; -extern crate proc_macro; - use proc_macro::TokenStream; +/// The main method that's called by the proc macro +/// (a wrapper around `pest_generator::derive_parser`) #[proc_macro_derive(Parser, attributes(grammar, grammar_inline))] pub fn derive_parser(input: TokenStream) -> TokenStream { pest_generator::derive_parser(input.into(), true).into() diff --git a/generator/Cargo.toml b/generator/Cargo.toml index 471667dc..ddde3ed0 100644 --- a/generator/Cargo.toml +++ b/generator/Cargo.toml @@ -1,10 +1,10 @@ [package] name = "pest_generator" description = "pest code generator" -version = "2.4.0" +version = "2.4.1" edition = "2021" authors = ["Dragoș Tiselice "] -homepage = "https://pest-parser.github.io/" +homepage = "https://pest.rs/" repository = "https://github.com/pest-parser/pest" documentation = "https://docs.rs/pest" keywords = ["pest", "generator"] @@ -18,8 +18,8 @@ default = ["std"] std = ["pest/std"] [dependencies] -pest = { path = "../pest", version = "2.4.0", default-features = false } -pest_meta = { path = "../meta", version = "2.4.0" } +pest = { path = "../pest", version = "2.4.1", default-features = false } +pest_meta = { path = "../meta", version = "2.4.1" } proc-macro2 = "1.0" quote = "1.0" syn = "1.0" diff --git a/generator/src/generator.rs b/generator/src/generator.rs index 0d3051e2..f3da1bac 100644 --- a/generator/src/generator.rs +++ b/generator/src/generator.rs @@ -159,7 +159,7 @@ fn generate_builtin_rules() -> Vec<(&'static str, TokenStream)> { builtins.push((property, quote! { #[inline] #[allow(dead_code, non_snake_case, unused_variables)] - fn #property_ident(state: #box_ty<::pest::ParserState>) -> ::pest::ParseResult<#box_ty<::pest::ParserState>> { + fn #property_ident(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { state.match_char_by(::pest::unicode::#property_ident) } })); @@ -249,7 +249,7 @@ fn generate_rule(rule: OptimizedRule) -> TokenStream { RuleType::Normal => quote! { #[inline] #[allow(non_snake_case, unused_variables)] - pub fn #name(state: #box_ty<::pest::ParserState>) -> ::pest::ParseResult<#box_ty<::pest::ParserState>> { + pub fn #name(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { state.rule(Rule::#name, |state| { #expr }) @@ -258,14 +258,14 @@ fn generate_rule(rule: OptimizedRule) -> TokenStream { RuleType::Silent => quote! { #[inline] #[allow(non_snake_case, unused_variables)] - pub fn #name(state: #box_ty<::pest::ParserState>) -> ::pest::ParseResult<#box_ty<::pest::ParserState>> { + pub fn #name(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { #expr } }, RuleType::Atomic => quote! { #[inline] #[allow(non_snake_case, unused_variables)] - pub fn #name(state: #box_ty<::pest::ParserState>) -> ::pest::ParseResult<#box_ty<::pest::ParserState>> { + pub fn #name(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { state.rule(Rule::#name, |state| { state.atomic(::pest::Atomicity::Atomic, |state| { #expr @@ -276,7 +276,7 @@ fn generate_rule(rule: OptimizedRule) -> TokenStream { RuleType::CompoundAtomic => quote! { #[inline] #[allow(non_snake_case, unused_variables)] - pub fn #name(state: #box_ty<::pest::ParserState>) -> ::pest::ParseResult<#box_ty<::pest::ParserState>> { + pub fn #name(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { state.atomic(::pest::Atomicity::CompoundAtomic, |state| { state.rule(Rule::#name, |state| { #expr @@ -287,7 +287,7 @@ fn generate_rule(rule: OptimizedRule) -> TokenStream { RuleType::NonAtomic => quote! { #[inline] #[allow(non_snake_case, unused_variables)] - pub fn #name(state: #box_ty<::pest::ParserState>) -> ::pest::ParseResult<#box_ty<::pest::ParserState>> { + pub fn #name(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { state.atomic(::pest::Atomicity::NonAtomic, |state| { state.rule(Rule::#name, |state| { #expr @@ -999,7 +999,7 @@ mod tests { #[inline] #[allow(dead_code, non_snake_case, unused_variables)] - pub fn skip(state: #box_ty<::pest::ParserState>) -> ::pest::ParseResult<#box_ty<::pest::ParserState>> { + pub fn skip(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { Ok(state) } } @@ -1009,13 +1009,13 @@ mod tests { #[inline] #[allow(non_snake_case, unused_variables)] - pub fn a(state: #box_ty<::pest::ParserState>) -> ::pest::ParseResult<#box_ty<::pest::ParserState>> { + pub fn a(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { state.match_string("b") } #[inline] #[allow(dead_code, non_snake_case, unused_variables)] - pub fn ANY(state: #box_ty<::pest::ParserState>) -> ::pest::ParseResult<#box_ty<::pest::ParserState>> { + pub fn ANY(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { state.skip(1) } } diff --git a/generator/src/lib.rs b/generator/src/lib.rs index 2ffcb581..2f420039 100644 --- a/generator/src/lib.rs +++ b/generator/src/lib.rs @@ -7,17 +7,19 @@ // option. All files in the project carrying such notice may not be copied, // modified, or distributed except according to those terms. -#![doc(html_root_url = "https://docs.rs/pest_derive")] +#![doc( + html_root_url = "https://docs.rs/pest_derive", + html_logo_url = "https://raw.githubusercontent.com/pest-parser/pest/master/pest-logo.svg", + html_favicon_url = "https://raw.githubusercontent.com/pest-parser/pest/master/pest-logo.svg" +)] +#![warn(missing_docs, rust_2018_idioms, unused_qualifications)] #![recursion_limit = "256"] +//! # pest generator +//! +//! This crate generates code from ASTs (which is used in the `pest_derive` crate). -extern crate pest; -extern crate pest_meta; - -extern crate proc_macro; -extern crate proc_macro2; #[macro_use] extern crate quote; -extern crate syn; use std::env; use std::fs::File; @@ -34,6 +36,9 @@ mod generator; use pest_meta::parser::{self, Rule}; use pest_meta::{optimizer, unwrap_or_report, validator}; +/// Processes the derive/proc macro input and generates the corresponding parser based +/// on the parsed grammar. If `include_grammar` is set to true, it'll generate an explicit +/// "include_str" statement (done in pest_derive, but turned off in the local bootstrap). pub fn derive_parser(input: TokenStream, include_grammar: bool) -> TokenStream { let ast: DeriveInput = syn::parse2(input).unwrap(); let (name, generics, content) = parse_derive(ast); diff --git a/generator/src/macros.rs b/generator/src/macros.rs index 37ef531f..377f66e6 100644 --- a/generator/src/macros.rs +++ b/generator/src/macros.rs @@ -19,7 +19,7 @@ macro_rules! generate_rule { quote! { #[inline] #[allow(dead_code, non_snake_case, unused_variables)] - pub fn $name(state: ::std::boxed::Box<::pest::ParserState>) -> ::pest::ParseResult<::std::boxed::Box<::pest::ParserState>> { + pub fn $name(state: ::std::boxed::Box<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<::std::boxed::Box<::pest::ParserState<'_, Rule>>> { $pattern } } @@ -32,7 +32,7 @@ macro_rules! generate_rule { quote! { #[inline] #[allow(dead_code, non_snake_case, unused_variables)] - pub fn $name(state: ::alloc::boxed::Box<::pest::ParserState>) -> ::pest::ParseResult<::alloc::boxed::Box<::pest::ParserState>> { + pub fn $name(state: ::alloc::boxed::Box<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<::alloc::boxed::Box<::pest::ParserState<'_, Rule>>> { $pattern } } diff --git a/grammars/Cargo.toml b/grammars/Cargo.toml index 72305164..474e021f 100644 --- a/grammars/Cargo.toml +++ b/grammars/Cargo.toml @@ -1,10 +1,10 @@ [package] name = "pest_grammars" description = "pest popular grammar implementations" -version = "2.4.0" +version = "2.4.1" edition = "2021" authors = ["Dragoș Tiselice "] -homepage = "https://pest-parser.github.io/" +homepage = "https://pest.rs/" repository = "https://github.com/pest-parser/pest" documentation = "https://docs.rs/pest" keywords = ["pest", "parser", "peg", "grammar"] @@ -14,8 +14,8 @@ readme = "_README.md" rust-version = "1.56" [dependencies] -pest = { path = "../pest", version = "2.4.0" } -pest_derive = { path = "../derive", version = "2.4.0" } +pest = { path = "../pest", version = "2.4.1" } +pest_derive = { path = "../derive", version = "2.4.1" } [dev-dependencies] criterion = "0.3" diff --git a/grammars/src/lib.rs b/grammars/src/lib.rs index 1aafb561..291fef24 100644 --- a/grammars/src/lib.rs +++ b/grammars/src/lib.rs @@ -11,14 +11,20 @@ //! //! Contains a series of default grammars. -#![doc(html_root_url = "https://docs.rs/pest_grammars")] +#![doc( + html_root_url = "https://docs.rs/pest_grammars", + html_logo_url = "https://raw.githubusercontent.com/pest-parser/pest/master/pest-logo.svg", + html_favicon_url = "https://raw.githubusercontent.com/pest-parser/pest/master/pest-logo.svg" +)] +#![warn(missing_docs, rust_2018_idioms, unused_qualifications)] -extern crate pest; #[macro_use] extern crate pest_derive; pub use pest::Parser; +/// Grammar rules of a simplified HTTP request parser +#[allow(missing_docs)] pub mod http { /// HTTP parser. #[derive(Parser)] @@ -26,6 +32,8 @@ pub mod http { pub struct HttpParser; } +/// Grammar rules of a sample JSON parser +#[allow(missing_docs)] pub mod json { /// JSON parser. #[derive(Parser)] @@ -33,6 +41,8 @@ pub mod json { pub struct JsonParser; } +/// Grammar rules of a sample TOML parser +#[allow(missing_docs)] pub mod toml { /// TOML parser. #[derive(Parser)] diff --git a/meta/Cargo.toml b/meta/Cargo.toml index e8f19d9c..7ebab384 100644 --- a/meta/Cargo.toml +++ b/meta/Cargo.toml @@ -1,10 +1,10 @@ [package] name = "pest_meta" description = "pest meta language parser and validator" -version = "2.4.0" +version = "2.4.1" edition = "2021" authors = ["Dragoș Tiselice "] -homepage = "https://pest-parser.github.io/" +homepage = "https://pest.rs/" repository = "https://github.com/pest-parser/pest" documentation = "https://docs.rs/pest" keywords = ["pest", "parser", "meta", "optimizer"] @@ -16,7 +16,7 @@ include = ["Cargo.toml", "src/**/*", "src/grammar.rs", "_README.md", "LICENSE-*" rust-version = "1.56" [dependencies] -pest = { path = "../pest", version = "2.4.0" } +pest = { path = "../pest", version = "2.4.1" } once_cell = "1.8.0" [build-dependencies] diff --git a/meta/src/ast.rs b/meta/src/ast.rs index da6ee527..ffac8ea7 100644 --- a/meta/src/ast.rs +++ b/meta/src/ast.rs @@ -7,22 +7,48 @@ // option. All files in the project carrying such notice may not be copied, // modified, or distributed except according to those terms. +//! Types for the pest's abstract syntax tree. + +/// A grammar rule #[derive(Clone, Debug, Eq, PartialEq)] pub struct Rule { + /// The name of the rule pub name: String, + /// The rule's type (silent, atomic, ...) pub ty: RuleType, + /// The rule's expression pub expr: Expr, } +/// All possible rule types #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum RuleType { + /// The normal rule type Normal, + /// Silent rules are just like normal rules + /// — when run, they function the same way — + /// except they do not produce pairs or tokens. + /// If a rule is silent, it will never appear in a parse result. + /// (their syntax is `_{ ... }`) Silent, + /// atomic rule prevent implicit whitespace: inside an atomic rule, + /// the tilde ~ means "immediately followed by", + /// and repetition operators (asterisk * and plus sign +) + /// have no implicit separation. In addition, all other rules + /// called from an atomic rule are also treated as atomic. + /// In an atomic rule, interior matching rules are silent. + /// (their syntax is `@{ ... }`) Atomic, + /// Compound atomic rules are similar to atomic rules, + /// but they produce inner tokens as normal. + /// (their syntax is `${ ... }`) CompoundAtomic, + /// Non-atomic rules cancel the effect of atomic rules. + /// (their syntax is `!{ ... }`) NonAtomic, } +/// All possible rule expressions #[derive(Clone, Debug, Eq, PartialEq)] pub enum Expr { /// Matches an exact string, e.g. `"a"` @@ -64,10 +90,12 @@ pub enum Expr { } impl Expr { + /// Returns the iterator that steps the expression from top to bottom. pub fn iter_top_down(&self) -> ExprTopDownIterator { ExprTopDownIterator::new(self) } + /// Applies `f` to the expression and all its children (top to bottom). pub fn map_top_down(self, mut f: F) -> Expr where F: FnMut(Expr) -> Expr, @@ -137,6 +165,7 @@ impl Expr { map_internal(self, &mut f) } + /// Applies `f` to the expression and all its children (bottom up). pub fn map_bottom_up(self, mut f: F) -> Expr where F: FnMut(Expr) -> Expr, @@ -207,6 +236,7 @@ impl Expr { } } +/// The top down iterator for an expression. pub struct ExprTopDownIterator { current: Option, next: Option, @@ -214,6 +244,7 @@ pub struct ExprTopDownIterator { } impl ExprTopDownIterator { + /// Constructs a top-down iterator from the expression. pub fn new(expr: &Expr) -> Self { let mut iter = ExprTopDownIterator { current: None, diff --git a/meta/src/lib.rs b/meta/src/lib.rs index 3b808a98..7212ee80 100644 --- a/meta/src/lib.rs +++ b/meta/src/lib.rs @@ -6,13 +6,19 @@ // license , at your // option. All files in the project carrying such notice may not be copied, // modified, or distributed except according to those terms. +//! # pest meta +//! +//! This crate parses, validates, optimizes, and converts pest's own grammars to ASTs. + +#![doc( + html_logo_url = "https://raw.githubusercontent.com/pest-parser/pest/master/pest-logo.svg", + html_favicon_url = "https://raw.githubusercontent.com/pest-parser/pest/master/pest-logo.svg" +)] +#![warn(missing_docs, rust_2018_idioms, unused_qualifications)] -extern crate once_cell; #[cfg(test)] #[macro_use] extern crate pest; -#[cfg(not(test))] -extern crate pest; use std::fmt::Display; @@ -21,6 +27,8 @@ pub mod optimizer; pub mod parser; pub mod validator; +/// A helper that will unwrap the result or panic +/// with the nicely formatted error message. pub fn unwrap_or_report(result: Result) -> T where E: IntoIterator, diff --git a/meta/src/optimizer/mod.rs b/meta/src/optimizer/mod.rs index e0cbdb0d..2038753b 100644 --- a/meta/src/optimizer/mod.rs +++ b/meta/src/optimizer/mod.rs @@ -7,6 +7,8 @@ // option. All files in the project carrying such notice may not be copied, // modified, or distributed except according to those terms. +//! Different optimizations for pest's ASTs. + use crate::ast::*; use std::collections::HashMap; @@ -26,6 +28,7 @@ mod rotater; mod skipper; mod unroller; +/// Takes pest's ASTs and optimizes them pub fn optimize(rules: Vec) -> Vec { let optimized: Vec = rules .into_iter() @@ -87,36 +90,57 @@ fn to_hash_map(rules: &[OptimizedRule]) -> HashMap { .collect() } +/// The optimized version of the pest AST's `Rule`. #[derive(Clone, Debug, Eq, PartialEq)] pub struct OptimizedRule { + /// The name of the rule. pub name: String, + /// The type of the rule. pub ty: RuleType, + /// The optimized expression of the rule. pub expr: OptimizedExpr, } +/// The optimized version of the pest AST's `Expr`. #[derive(Clone, Debug, Eq, PartialEq)] pub enum OptimizedExpr { + /// Matches an exact string, e.g. `"a"` Str(String), + /// Matches an exact string, case insensitively (ASCII only), e.g. `^"a"` Insens(String), + /// Matches one character in the range, e.g. `'a'..'z'` Range(String, String), + /// Matches the rule with the given name, e.g. `a` Ident(String), + /// Matches a custom part of the stack, e.g. `PEEK[..]` PeekSlice(i32, Option), + /// Positive lookahead; matches expression without making progress, e.g. `&e` PosPred(Box), + /// Negative lookahead; matches if expression doesn't match, without making progress, e.g. `!e` NegPred(Box), + /// Matches a sequence of two expressions, e.g. `e1 ~ e2` Seq(Box, Box), + /// Matches either of two expressions, e.g. `e1 | e2` Choice(Box, Box), + /// Optionally matches an expression, e.g. `e?` Opt(Box), + /// Matches an expression zero or more times, e.g. `e*` Rep(Box), + /// Continues to match expressions until one of the strings in the `Vec` is found Skip(Vec), + /// Matches an expression and pushes it to the stack, e.g. `push(e)` Push(Box), + /// Restores an expression's checkpoint RestoreOnErr(Box), } impl OptimizedExpr { + /// Returns a top-down iterator over the `OptimizedExpr`. pub fn iter_top_down(&self) -> OptimizedExprTopDownIterator { OptimizedExprTopDownIterator::new(self) } + /// Applies `f` to the `OptimizedExpr` top-down. pub fn map_top_down(self, mut f: F) -> OptimizedExpr where F: FnMut(OptimizedExpr) -> OptimizedExpr, @@ -166,6 +190,7 @@ impl OptimizedExpr { map_internal(self, &mut f) } + /// Applies `f` to the `OptimizedExpr` bottom-up. pub fn map_bottom_up(self, mut f: F) -> OptimizedExpr where F: FnMut(OptimizedExpr) -> OptimizedExpr, @@ -216,6 +241,7 @@ impl OptimizedExpr { } } +/// A top-down iterator over an `OptimizedExpr`. pub struct OptimizedExprTopDownIterator { current: Option, next: Option, @@ -223,6 +249,7 @@ pub struct OptimizedExprTopDownIterator { } impl OptimizedExprTopDownIterator { + /// Creates a new top down iterator from an `OptimizedExpr`. pub fn new(expr: &OptimizedExpr) -> Self { let mut iter = OptimizedExprTopDownIterator { current: None, diff --git a/meta/src/parser.rs b/meta/src/parser.rs index 23e2565b..35c867ce 100644 --- a/meta/src/parser.rs +++ b/meta/src/parser.rs @@ -7,6 +7,8 @@ // option. All files in the project carrying such notice may not be copied, // modified, or distributed except according to those terms. +//! Types and helpers for the pest's own grammar parser. + use std::char; use std::iter::Peekable; @@ -18,31 +20,48 @@ use pest::{Parser, Span}; use crate::ast::{Expr, Rule as AstRule, RuleType}; use crate::validator; +/// TODO: fix the generator to at least add explicit lifetimes +#[allow( + missing_docs, + unused_attributes, + elided_lifetimes_in_paths, + unused_qualifications +)] mod grammar { include!("grammar.rs"); } pub use self::grammar::*; -pub fn parse(rule: Rule, data: &str) -> Result, Error> { +/// A helper that will parse using the pest grammar +pub fn parse(rule: Rule, data: &str) -> Result, Error> { PestParser::parse(rule, data) } +/// The pest grammar rule #[derive(Clone, Debug, Eq, PartialEq)] pub struct ParserRule<'i> { + /// The rule's name pub name: String, + /// The rule's span pub span: Span<'i>, + /// The rule's type pub ty: RuleType, + /// The rule's parser node pub node: ParserNode<'i>, } +/// The pest grammar node #[derive(Clone, Debug, Eq, PartialEq)] pub struct ParserNode<'i> { + /// The node's expression pub expr: ParserExpr<'i>, + /// The node's span pub span: Span<'i>, } impl<'i> ParserNode<'i> { + /// will remove nodes that do not match `f` pub fn filter_map_top_down(self, mut f: F) -> Vec where F: FnMut(ParserNode<'i>) -> Option, @@ -107,34 +126,52 @@ impl<'i> ParserNode<'i> { } } +/// All possible parser expressions #[derive(Clone, Debug, Eq, PartialEq)] pub enum ParserExpr<'i> { + /// Matches an exact string, e.g. `"a"` Str(String), + /// Matches an exact string, case insensitively (ASCII only), e.g. `^"a"` Insens(String), + /// Matches one character in the range, e.g. `'a'..'z'` Range(String, String), + /// Matches the rule with the given name, e.g. `a` Ident(String), + /// Matches a custom part of the stack, e.g. `PEEK[..]` PeekSlice(i32, Option), + /// Positive lookahead; matches expression without making progress, e.g. `&e` PosPred(Box>), + /// Negative lookahead; matches if expression doesn't match, without making progress, e.g. `!e` NegPred(Box>), + /// Matches a sequence of two expressions, e.g. `e1 ~ e2` Seq(Box>, Box>), + /// Matches either of two expressions, e.g. `e1 | e2` Choice(Box>, Box>), + /// Optionally matches an expression, e.g. `e?` Opt(Box>), + /// Matches an expression zero or more times, e.g. `e*` Rep(Box>), + /// Matches an expression one or more times, e.g. `e+` RepOnce(Box>), + /// Matches an expression an exact number of times, e.g. `e{n}` RepExact(Box>, u32), + /// Matches an expression at least a number of times, e.g. `e{n,}` RepMin(Box>, u32), + /// Matches an expression at most a number of times, e.g. `e{,n}` RepMax(Box>, u32), + /// Matches an expression a number of times within a range, e.g. `e{m, n}` RepMinMax(Box>, u32, u32), + /// Matches an expression and pushes it to the stack, e.g. `push(e)` Push(Box>), } -fn convert_rule(rule: ParserRule) -> AstRule { +fn convert_rule(rule: ParserRule<'_>) -> AstRule { let ParserRule { name, ty, node, .. } = rule; let expr = convert_node(node); AstRule { name, ty, expr } } -fn convert_node(node: ParserNode) -> Expr { +fn convert_node(node: ParserNode<'_>) -> Expr { match node.expr { ParserExpr::Str(string) => Expr::Str(string), ParserExpr::Insens(string) => Expr::Insens(string), @@ -164,7 +201,8 @@ fn convert_node(node: ParserNode) -> Expr { } } -pub fn consume_rules(pairs: Pairs) -> Result, Vec>> { +/// Converts a parser's result (`Pairs`) to an AST +pub fn consume_rules(pairs: Pairs<'_, Rule>) -> Result, Vec>> { let rules = consume_rules_with_spans(pairs)?; let errors = validator::validate_ast(&rules); if errors.is_empty() { @@ -174,7 +212,9 @@ pub fn consume_rules(pairs: Pairs) -> Result, Vec } } -fn consume_rules_with_spans(pairs: Pairs) -> Result, Vec>> { +fn consume_rules_with_spans( + pairs: Pairs<'_, Rule>, +) -> Result>, Vec>> { let pratt = PrattParser::new() .op(Op::infix(Rule::choice_operator, Assoc::Left)) .op(Op::infix(Rule::sequence_operator, Assoc::Left)); @@ -1527,19 +1567,19 @@ mod tests { )); const ERROR: &str = "call limit reached"; pest::set_call_limit(Some(5_000usize.try_into().unwrap())); - let s1 = crate::parser::parse(crate::parser::Rule::grammar_rules, sample1); + let s1 = parse(Rule::grammar_rules, sample1); assert!(s1.is_err()); assert_eq!(s1.unwrap_err().variant.message(), ERROR); - let s2 = crate::parser::parse(crate::parser::Rule::grammar_rules, sample2); + let s2 = parse(Rule::grammar_rules, sample2); assert!(s2.is_err()); assert_eq!(s2.unwrap_err().variant.message(), ERROR); - let s3 = crate::parser::parse(crate::parser::Rule::grammar_rules, sample3); + let s3 = parse(Rule::grammar_rules, sample3); assert!(s3.is_err()); assert_eq!(s3.unwrap_err().variant.message(), ERROR); - let s4 = crate::parser::parse(crate::parser::Rule::grammar_rules, sample4); + let s4 = parse(Rule::grammar_rules, sample4); assert!(s4.is_err()); assert_eq!(s4.unwrap_err().variant.message(), ERROR); - let s5 = crate::parser::parse(crate::parser::Rule::grammar_rules, sample5); + let s5 = parse(Rule::grammar_rules, sample5); assert!(s5.is_err()); assert_eq!(s5.unwrap_err().variant.message(), ERROR); } diff --git a/meta/src/validator.rs b/meta/src/validator.rs index 45367795..4bb46ec5 100644 --- a/meta/src/validator.rs +++ b/meta/src/validator.rs @@ -7,6 +7,9 @@ // option. All files in the project carrying such notice may not be copied, // modified, or distributed except according to those terms. +//! Helpers for validating pest grammars that could help with debugging +//! and provide a more user-friendly error message. + use once_cell::sync::Lazy; use std::collections::{HashMap, HashSet}; @@ -67,7 +70,12 @@ static BUILTINS: Lazy> = Lazy::new(|| { .collect::>() }); -pub fn validate_pairs(pairs: Pairs) -> Result, Vec>> { +/// The parsed grammar for common mistakes: +/// - using Rust keywords +/// - using Pest keywords +/// - duplicate rules +/// - undefined rules +pub fn validate_pairs(pairs: Pairs<'_, Rule>) -> Result, Vec>> { let definitions: Vec<_> = pairs .clone() .filter(|pair| pair.as_rule() == Rule::grammar_rule) @@ -104,8 +112,9 @@ pub fn validate_pairs(pairs: Pairs) -> Result, Vec>> Ok(defaults.cloned().collect()) } +/// Validates that the given `definitions` do not contain any Rust keywords. #[allow(clippy::ptr_arg)] -pub fn validate_rust_keywords(definitions: &Vec) -> Vec> { +pub fn validate_rust_keywords(definitions: &Vec>) -> Vec> { let mut errors = vec![]; for definition in definitions { @@ -124,8 +133,9 @@ pub fn validate_rust_keywords(definitions: &Vec) -> Vec> { errors } +/// Validates that the given `definitions` do not contain any Pest keywords. #[allow(clippy::ptr_arg)] -pub fn validate_pest_keywords(definitions: &Vec) -> Vec> { +pub fn validate_pest_keywords(definitions: &Vec>) -> Vec> { let mut errors = vec![]; for definition in definitions { @@ -144,8 +154,9 @@ pub fn validate_pest_keywords(definitions: &Vec) -> Vec> { errors } +/// Validates that the given `definitions` do not contain any duplicate rules. #[allow(clippy::ptr_arg)] -pub fn validate_already_defined(definitions: &Vec) -> Vec> { +pub fn validate_already_defined(definitions: &Vec>) -> Vec> { let mut errors = vec![]; let mut defined = HashSet::new(); @@ -167,6 +178,7 @@ pub fn validate_already_defined(definitions: &Vec) -> Vec> { errors } +/// Validates that the given `definitions` do not contain any undefined rules. #[allow(clippy::ptr_arg)] pub fn validate_undefined<'i>( definitions: &Vec>, @@ -191,6 +203,10 @@ pub fn validate_undefined<'i>( errors } +/// Validates the abstract syntax tree for common mistakes: +/// - infinite repetitions +/// - choices that cannot be reached +/// - left recursion #[allow(clippy::ptr_arg)] pub fn validate_ast<'a, 'i: 'a>(rules: &'a Vec>) -> Vec> { let mut errors = vec![]; diff --git a/pest/Cargo.toml b/pest/Cargo.toml index 2cb4477a..0210441b 100644 --- a/pest/Cargo.toml +++ b/pest/Cargo.toml @@ -1,10 +1,10 @@ [package] name = "pest" description = "The Elegant Parser" -version = "2.4.0" +version = "2.4.1" edition = "2021" authors = ["Dragoș Tiselice "] -homepage = "https://pest-parser.github.io/" +homepage = "https://pest.rs/" repository = "https://github.com/pest-parser/pest" documentation = "https://docs.rs/pest" keywords = ["pest", "parser", "peg", "grammar"] diff --git a/pest/examples/parens.rs b/pest/examples/parens.rs index 566c119b..34fbb2e3 100644 --- a/pest/examples/parens.rs +++ b/pest/examples/parens.rs @@ -18,11 +18,11 @@ struct ParenParser; impl Parser for ParenParser { fn parse(rule: Rule, input: &str) -> Result, Error> { - fn expr(state: Box>) -> ParseResult>> { + fn expr(state: Box>) -> ParseResult>> { state.sequence(|s| s.repeat(paren).and_then(|s| s.end_of_input())) } - fn paren(state: Box>) -> ParseResult>> { + fn paren(state: Box>) -> ParseResult>> { state.rule(Rule::paren, |s| { s.sequence(|s| { s.match_string("(") diff --git a/pest/src/error.rs b/pest/src/error.rs index a83e23a4..88efa0f8 100644 --- a/pest/src/error.rs +++ b/pest/src/error.rs @@ -101,7 +101,7 @@ impl Error { /// /// println!("{}", error); /// ``` - pub fn new_from_pos(variant: ErrorVariant, pos: Position) -> Error { + pub fn new_from_pos(variant: ErrorVariant, pos: Position<'_>) -> Error { let visualize_ws = pos.match_char('\n') || pos.match_char('\r'); let line_of = pos.line_of(); let line = if visualize_ws { @@ -147,7 +147,7 @@ impl Error { /// /// println!("{}", error); /// ``` - pub fn new_from_span(variant: ErrorVariant, span: Span) -> Error { + pub fn new_from_span(variant: ErrorVariant, span: Span<'_>) -> Error { let end = span.end_pos(); let mut end_line_col = end.line_col(); // end position is after a \n, so we want to point to the visual lf symbol @@ -505,7 +505,7 @@ impl ErrorVariant { /// }; /// /// println!("{}", variant.message()); - pub fn message(&self) -> Cow { + pub fn message(&self) -> Cow<'_, str> { match self { ErrorVariant::ParsingError { ref positives, @@ -519,13 +519,13 @@ impl ErrorVariant { } impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}", self.format()) } } impl fmt::Display for ErrorVariant { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { ErrorVariant::ParsingError { .. } => write!(f, "parsing error: {}", self.message()), ErrorVariant::CustomError { .. } => write!(f, "{}", self.message()), diff --git a/pest/src/iterators/flat_pairs.rs b/pest/src/iterators/flat_pairs.rs index 85171a3b..6d310272 100644 --- a/pest/src/iterators/flat_pairs.rs +++ b/pest/src/iterators/flat_pairs.rs @@ -38,7 +38,7 @@ pub unsafe fn new( input: &str, start: usize, end: usize, -) -> FlatPairs { +) -> FlatPairs<'_, R> { FlatPairs { queue, input, @@ -130,7 +130,7 @@ impl<'i, R: RuleType> DoubleEndedIterator for FlatPairs<'i, R> { } impl<'i, R: RuleType> fmt::Debug for FlatPairs<'i, R> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("FlatPairs") .field("pairs", &self.clone().collect::>()) .finish() diff --git a/pest/src/iterators/pair.rs b/pest/src/iterators/pair.rs index 0a8f735a..62c95e03 100644 --- a/pest/src/iterators/pair.rs +++ b/pest/src/iterators/pair.rs @@ -52,7 +52,7 @@ pub unsafe fn new( queue: Rc>>, input: &str, start: usize, -) -> Pair { +) -> Pair<'_, R> { Pair { queue, input, @@ -268,7 +268,7 @@ impl<'i, R: RuleType> Pairs<'i, R> { } impl<'i, R: RuleType> fmt::Debug for Pair<'i, R> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("Pair") .field("rule", &self.as_rule()) .field("span", &self.as_span()) @@ -278,7 +278,7 @@ impl<'i, R: RuleType> fmt::Debug for Pair<'i, R> { } impl<'i, R: RuleType> fmt::Display for Pair<'i, R> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let rule = self.as_rule(); let start = self.pos(self.start); let end = self.pos(self.pair()); diff --git a/pest/src/iterators/pairs.rs b/pest/src/iterators/pairs.rs index 654b7dad..e478cebf 100644 --- a/pest/src/iterators/pairs.rs +++ b/pest/src/iterators/pairs.rs @@ -43,7 +43,7 @@ pub fn new( input: &str, start: usize, end: usize, -) -> Pairs { +) -> Pairs<'_, R> { Pairs { queue, input, @@ -246,13 +246,13 @@ impl<'i, R: RuleType> DoubleEndedIterator for Pairs<'i, R> { } impl<'i, R: RuleType> fmt::Debug for Pairs<'i, R> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_list().entries(self.clone()).finish() } } impl<'i, R: RuleType> fmt::Display for Pairs<'i, R> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( f, "[{}]", diff --git a/pest/src/iterators/tokens.rs b/pest/src/iterators/tokens.rs index f21cf072..0d462711 100644 --- a/pest/src/iterators/tokens.rs +++ b/pest/src/iterators/tokens.rs @@ -39,7 +39,7 @@ pub fn new( input: &str, start: usize, end: usize, -) -> Tokens { +) -> Tokens<'_, R> { if cfg!(debug_assertions) { for tok in queue.iter() { match *tok { @@ -123,7 +123,7 @@ impl<'i, R: RuleType> DoubleEndedIterator for Tokens<'i, R> { } impl<'i, R: RuleType> fmt::Debug for Tokens<'i, R> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_list().entries(self.clone()).finish() } } @@ -138,9 +138,9 @@ mod tests { #[test] fn double_ended_iter_for_tokens() { let pairs = AbcParser::parse(Rule::a, "abcde").unwrap(); - let mut tokens = pairs.clone().tokens().collect::>>(); + let mut tokens = pairs.clone().tokens().collect::>>(); tokens.reverse(); - let reverse_tokens = pairs.tokens().rev().collect::>>(); + let reverse_tokens = pairs.tokens().rev().collect::>>(); assert_eq!(tokens, reverse_tokens); } } diff --git a/pest/src/lib.rs b/pest/src/lib.rs index c2a83a32..47d82f14 100644 --- a/pest/src/lib.rs +++ b/pest/src/lib.rs @@ -7,7 +7,11 @@ // option. All files in the project carrying such notice may not be copied, // modified, or distributed except according to those terms. #![no_std] - +#![doc( + html_logo_url = "https://raw.githubusercontent.com/pest-parser/pest/master/pest-logo.svg", + html_favicon_url = "https://raw.githubusercontent.com/pest-parser/pest/master/pest-logo.svg" +)] +#![warn(missing_docs, rust_2018_idioms, unused_qualifications)] //! # pest. The Elegant Parser //! //! pest is a general purpose parser written in Rust with a focus on accessibility, correctness, @@ -25,12 +29,15 @@ //! //! * API reference on [docs.rs] //! * play with grammars and share them on our [fiddle] -//! * leave feedback, ask questions, or greet us on [Gitter] +//! * find previous common questions answered or ask questions on [GitHub Discussions] +//! * leave feedback, ask questions, or greet us on [Gitter] or [Discord] //! -//! [book]: https://pest-parser.github.io/book +//! [book]: https://pest.rs/book //! [docs.rs]: https://docs.rs/pest -//! [fiddle]: https://pest-parser.github.io/#editor -//! [Gitter]: https://gitter.im/dragostis/pest +//! [fiddle]: https://pest.rs/#editor +//! [Gitter]: https://gitter.im/pest-parser/pest +//! [Discord]: https://discord.gg/XEGACtWpT2 +//! [GitHub Discussions]: https://github.com/pest-parser/pest/discussions //! //! ## Usage //! @@ -61,18 +68,258 @@ //! //! [`Parser`]: trait.Parser.html //! [`pest_derive` crate]: https://docs.rs/pest_derive/ +//! +//! ## Grammar +//! +//! A grammar is a series of rules separated by whitespace, possibly containing comments. +//! +//! ### Comments +//! +//! Comments start with `//` and end at the end of the line. +//! +//! ```text +//! // a comment +//! ``` +//! +//! ### Rules +//! +//! Rules have the following form: +//! +//! ```ignore +//! name = optional_modifier { expression } +//! ``` +//! +//! The name of the rule is formed from alphanumeric characters or `_` with the condition that the +//! first character is not a digit and is used to create token pairs. When the rule starts being +//! parsed, the starting part of the token is being produced, with the ending part being produced +//! when the rule finishes parsing. +//! +//! The following token pair notation `a(b(), c())` denotes the tokens: start `a`, start `b`, end +//! `b`, start `c`, end `c`, end `a`. +//! +//! #### Modifiers +//! +//! Modifiers are optional and can be one of `_`, `@`, `$`, or `!`. These modifiers change the +//! behavior of the rules. +//! +//! 1. Silent (`_`) +//! +//! Silent rules do not create token pairs during parsing, nor are they error-reported. +//! +//! ```ignore +//! a = _{ "a" } +//! b = { a ~ "b" } +//! ``` +//! +//! Parsing `"ab"` produces the token pair `b()`. +//! +//! 2. Atomic (`@`) +//! +//! Atomic rules do not accept whitespace or comments within their expressions and have a +//! cascading effect on any rule they call. I.e. rules that are not atomic but are called by atomic +//! rules behave atomically. +//! +//! Any rules called by atomic rules do not generate token pairs. +//! +//! ```ignore +//! a = { "a" } +//! b = @{ a ~ "b" } +//! +//! WHITESPACE = _{ " " } +//! ``` +//! +//! Parsing `"ab"` produces the token pair `b()`, while `"a b"` produces an error. +//! +//! 3. Compound-atomic (`$`) +//! +//! Compound-atomic are identical to atomic rules with the exception that rules called by them are +//! not forbidden from generating token pairs. +//! +//! ```ignore +//! a = { "a" } +//! b = ${ a ~ "b" } +//! +//! WHITESPACE = _{ " " } +//! ``` +//! +//! Parsing `"ab"` produces the token pairs `b(a())`, while `"a b"` produces an error. +//! +//! 4. Non-atomic (`!`) +//! +//! Non-atomic are identical to normal rules with the exception that they stop the cascading effect +//! of atomic and compound-atomic rules. +//! +//! ```ignore +//! a = { "a" } +//! b = !{ a ~ "b" } +//! c = @{ b } +//! +//! WHITESPACE = _{ " " } +//! ``` +//! +//! Parsing both `"ab"` and `"a b"` produce the token pairs `c(a())`. +//! +//! #### Expressions +//! +//! Expressions can be either terminals or non-terminals. +//! +//! 1. Terminals +//! +//! | Terminal | Usage | +//! |------------|----------------------------------------------------------------| +//! | `"a"` | matches the exact string `"a"` | +//! | `^"a"` | matches the exact string `"a"` case insensitively (ASCII only) | +//! | `'a'..'z'` | matches one character between `'a'` and `'z'` | +//! | `a` | matches rule `a` | +//! +//! Strings and characters follow +//! [Rust's escape mechanisms](https://doc.rust-lang.org/reference/tokens.html#byte-escapes), while +//! identifiers can contain alphanumeric characters and underscores (`_`), as long as they do not +//! start with a digit. +//! +//! 2. Non-terminals +//! +//! | Non-terminal | Usage | +//! |-----------------------|------------------------------------------------------------| +//! | `(e)` | matches `e` | +//! | `e1 ~ e2` | matches the sequence `e1` `e2` | +//! | e1 \| e2 | matches either `e1` or `e2` | +//! | `e*` | matches `e` zero or more times | +//! | `e+` | matches `e` one or more times | +//! | `e{n}` | matches `e` exactly `n` times | +//! | `e{, n}` | matches `e` at most `n` times | +//! | `e{n,} ` | matches `e` at least `n` times | +//! | `e{m, n}` | matches `e` between `m` and `n` times inclusively | +//! | `e?` | optionally matches `e` | +//! | `&e` | matches `e` without making progress | +//! | `!e` | matches if `e` doesn't match without making progress | +//! | `PUSH(e)` | matches `e` and pushes it's captured string down the stack | +//! +//! where `e`, `e1`, and `e2` are expressions. +//! +//! Expressions can modify the stack only if they match the input. For example, +//! if `e1` in the compound expression `e1 | e2` does not match the input, then +//! it does not modify the stack, so `e2` sees the stack in the same state as +//! `e1` did. Repetitions and optionals (`e*`, `e+`, `e{, n}`, `e{n,}`, +//! `e{m,n}`, `e?`) can modify the stack each time `e` matches. The `!e` and `&e` +//! expressions are a special case; they never modify the stack. +//! Many languages have "keyword" tokens (e.g. if, for, while) as well as general +//! tokens (e.g. identifier) that matches any word. In order to match a keyword, +//! generally, you may need to restrict that is not immediately followed by another +//! letter or digit (otherwise it would be matched as an identifier). +//! +//! ## Special rules +//! +//! Special rules can be called within the grammar. They are: +//! +//! * `WHITESPACE` - runs between rules and sub-rules +//! * `COMMENT` - runs between rules and sub-rules +//! * `ANY` - matches exactly one `char` +//! * `SOI` - (start-of-input) matches only when a `Parser` is still at the starting position +//! * `EOI` - (end-of-input) matches only when a `Parser` has reached its end +//! * `POP` - pops a string from the stack and matches it +//! * `POP_ALL` - pops the entire state of the stack and matches it +//! * `PEEK` - peeks a string from the stack and matches it +//! * `PEEK[a..b]` - peeks part of the stack and matches it +//! * `PEEK_ALL` - peeks the entire state of the stack and matches it +//! * `DROP` - drops the top of the stack (fails to match if the stack is empty) +//! +//! `WHITESPACE` and `COMMENT` should be defined manually if needed. All other rules cannot be +//! overridden. +//! +//! ## `WHITESPACE` and `COMMENT` +//! +//! When defined, these rules get matched automatically in sequences (`~`) and repetitions +//! (`*`, `+`) between expressions. Atomic rules and those rules called by atomic rules are exempt +//! from this behavior. +//! +//! These rules should be defined so as to match one whitespace character and one comment only since +//! they are run in repetitions. +//! +//! If both `WHITESPACE` and `COMMENT` are defined, this grammar: +//! +//! ```ignore +//! a = { b ~ c } +//! ``` +//! +//! is effectively transformed into this one behind the scenes: +//! +//! ```ignore +//! a = { b ~ WHITESPACE* ~ (COMMENT ~ WHITESPACE*)* ~ c } +//! ``` +//! +//! ## `PUSH`, `POP`, `DROP`, and `PEEK` +//! +//! `PUSH(e)` simply pushes the captured string of the expression `e` down a stack. This stack can +//! then later be used to match grammar based on its content with `POP` and `PEEK`. +//! +//! `PEEK` always matches the string at the top of stack. So, if the stack contains `["b", "a"]` +//! (`"a"` being on top), this grammar: +//! +//! ```ignore +//! a = { PEEK } +//! ``` +//! +//! is effectively transformed into at parse time: +//! +//! ```ignore +//! a = { "a" } +//! ``` +//! +//! `POP` works the same way with the exception that it pops the string off of the stack if the +//! match worked. With the stack from above, if `POP` matches `"a"`, the stack will be mutated +//! to `["b"]`. +//! +//! `DROP` makes it possible to remove the string at the top of the stack +//! without matching it. If the stack is nonempty, `DROP` drops the top of the +//! stack. If the stack is empty, then `DROP` fails to match. +//! +//! ### Advanced peeking +//! +//! `PEEK[start..end]` and `PEEK_ALL` allow to peek deeper into the stack. The syntax works exactly +//! like Rust’s exclusive slice syntax. Additionally, negative indices can be used to indicate an +//! offset from the top. If the end lies before or at the start, the expression matches (as does +//! a `PEEK_ALL` on an empty stack). With the stack `["c", "b", "a"]` (`"a"` on top): +//! +//! ```ignore +//! fill = PUSH("c") ~ PUSH("b") ~ PUSH("a") +//! v = { PEEK_ALL } = { "a" ~ "b" ~ "c" } // top to bottom +//! w = { PEEK[..] } = { "c" ~ "b" ~ "a" } // bottom to top +//! x = { PEEK[1..2] } = { PEEK[1..-1] } = { "b" } +//! y = { PEEK[..-2] } = { PEEK[0..1] } = { "a" } +//! z = { PEEK[1..] } = { PEEK[-2..3] } = { "c" ~ "b" } +//! n = { PEEK[2..-2] } = { PEEK[2..1] } = { "" } +//! ``` +//! +//! For historical reasons, `PEEK_ALL` matches from top to bottom, while `PEEK[start..end]` matches +//! from bottom to top. There is currently no syntax to match a slice of the stack top to bottom. +//! +//! ## `Rule` +//! +//! All rules defined or used in the grammar populate a generated `enum` called `Rule`. This +//! implements `pest`'s `RuleType` and can be used throughout the API. +//! +//! ## `Built-in rules` +//! +//! Pest also comes with a number of built-in rules for convenience. They are: +//! +//! * `ASCII_DIGIT` - matches a numeric character from 0..9 +//! * `ASCII_NONZERO_DIGIT` - matches a numeric character from 1..9 +//! * `ASCII_BIN_DIGIT` - matches a numeric character from 0..1 +//! * `ASCII_OCT_DIGIT` - matches a numeric character from 0..7 +//! * `ASCII_HEX_DIGIT` - matches a numeric character from 0..9 or a..f or A..F +//! * `ASCII_ALPHA_LOWER` - matches a character from a..z +//! * `ASCII_ALPHA_UPPER` - matches a character from A..Z +//! * `ASCII_ALPHA` - matches a character from a..z or A..Z +//! * `ASCII_ALPHANUMERIC` - matches a character from a..z or A..Z or 0..9 +//! * `ASCII` - matches a character from \x00..\x7f +//! * `NEWLINE` - matches either "\n" or "\r\n" or "\r" #![doc(html_root_url = "https://docs.rs/pest")] extern crate alloc; #[cfg(feature = "std")] extern crate std; -extern crate ucd_trie; - -#[cfg(feature = "pretty-print")] -extern crate serde; -#[cfg(feature = "pretty-print")] -extern crate serde_json; pub use crate::parser::Parser; pub use crate::parser_state::{ diff --git a/pest/src/macros.rs b/pest/src/macros.rs index 6f5c4c6e..0ca6d9e0 100644 --- a/pest/src/macros.rs +++ b/pest/src/macros.rs @@ -334,7 +334,7 @@ pub mod tests { pub struct AbcParser; impl Parser for AbcParser { - fn parse(_: Rule, input: &str) -> Result, Error> { + fn parse(_: Rule, input: &str) -> Result, Error> { state(input, |state| { state .rule(Rule::a, |s| { diff --git a/pest/src/parser.rs b/pest/src/parser.rs index 8dd38141..caf71264 100644 --- a/pest/src/parser.rs +++ b/pest/src/parser.rs @@ -14,5 +14,5 @@ use crate::RuleType; /// A trait with a single method that parses strings. pub trait Parser { /// Parses a `&str` starting from `rule`. - fn parse(rule: R, input: &str) -> Result, Error>; + fn parse(rule: R, input: &str) -> Result, Error>; } diff --git a/pest/src/parser_state.rs b/pest/src/parser_state.rs index a4422e86..f710d8b2 100644 --- a/pest/src/parser_state.rs +++ b/pest/src/parser_state.rs @@ -28,8 +28,24 @@ use crate::RuleType; /// [`ParserState`]: struct.ParserState.html #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum Lookahead { + /// The positive predicate, written as an ampersand &, + /// attempts to match its inner expression. + /// If the inner expression succeeds, parsing continues, + /// but at the same position as the predicate — + /// &foo ~ bar is thus a kind of "AND" statement: + /// "the input string must match foo AND bar". + /// If the inner expression fails, + /// the whole expression fails too. Positive, + /// The negative predicate, written as an exclamation mark !, + /// attempts to match its inner expression. + /// If the inner expression fails, the predicate succeeds + /// and parsing continues at the same position as the predicate. + /// If the inner expression succeeds, the predicate fails — + /// !foo ~ bar is thus a kind of "NOT" statement: + /// "the input string must match bar but NOT foo". Negative, + /// No lookahead (i.e. it will consume input). None, } @@ -38,8 +54,16 @@ pub enum Lookahead { /// [`ParserState`]: struct.ParserState.html #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum Atomicity { + /// prevents implicit whitespace: inside an atomic rule, + /// the tilde ~ means "immediately followed by", + /// and repetition operators (asterisk * and plus sign +) + /// have no implicit separation. In addition, all other rules + /// called from an atomic rule are also treated as atomic. + /// (interior matching rules are silent) Atomic, + /// The same as atomic, but inner tokens are produced as normal. CompoundAtomic, + /// implicit whitespace is enabled NonAtomic, } @@ -49,7 +73,9 @@ pub type ParseResult = Result; /// Match direction for the stack. Used in `PEEK[a..b]`/`stack_match_peek_slice`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum MatchDir { + /// from the bottom to the top of the stack BottomToTop, + /// from the top to the bottom of the stack TopToBottom, } @@ -195,7 +221,7 @@ impl<'i, R: RuleType> ParserState<'i, R> { /// } /// /// let input = "ab"; - /// let mut state: Box> = pest::ParserState::new(input); + /// let mut state: Box> = pest::ParserState::new(input); /// let position = state.position(); /// assert_eq!(position.pos(), 0); /// ``` @@ -217,7 +243,7 @@ impl<'i, R: RuleType> ParserState<'i, R> { /// } /// /// let input = "ab"; - /// let mut state: Box> = pest::ParserState::new(input); + /// let mut state: Box> = pest::ParserState::new(input); /// let atomicity = state.atomicity(); /// assert_eq!(atomicity, Atomicity::NonAtomic); /// ``` @@ -464,7 +490,7 @@ impl<'i, R: RuleType> ParserState<'i, R> { /// } /// /// let input = "aab"; - /// let mut state: Box> = pest::ParserState::new(input); + /// let mut state: Box> = pest::ParserState::new(input); /// let mut result = state.repeat(|s| { /// s.match_string("a") /// }); @@ -508,7 +534,7 @@ impl<'i, R: RuleType> ParserState<'i, R> { /// } /// /// let input = "ab"; - /// let mut state: Box> = pest::ParserState::new(input); + /// let mut state: Box> = pest::ParserState::new(input); /// let result = state.optional(|s| { /// s.match_string("ab") /// }); @@ -544,13 +570,13 @@ impl<'i, R: RuleType> ParserState<'i, R> { /// enum Rule {} /// /// let input = "ab"; - /// let mut state: Box> = pest::ParserState::new(input); + /// let mut state: Box> = pest::ParserState::new(input); /// let result = state.match_char_by(|c| c.is_ascii()); /// assert!(result.is_ok()); /// assert_eq!(result.unwrap().position().pos(), 1); /// /// let input = "❤"; - /// let mut state: Box> = pest::ParserState::new(input); + /// let mut state: Box> = pest::ParserState::new(input); /// let result = state.match_char_by(|c| c.is_ascii()); /// assert!(result.is_err()); /// assert_eq!(result.unwrap_err().position().pos(), 0); @@ -579,7 +605,7 @@ impl<'i, R: RuleType> ParserState<'i, R> { /// enum Rule {} /// /// let input = "ab"; - /// let mut state: Box> = pest::ParserState::new(input); + /// let mut state: Box> = pest::ParserState::new(input); /// let mut result = state.match_string("ab"); /// assert!(result.is_ok()); /// assert_eq!(result.unwrap().position().pos(), 2); @@ -610,7 +636,7 @@ impl<'i, R: RuleType> ParserState<'i, R> { /// enum Rule {} /// /// let input = "ab"; - /// let mut state: Box> = pest::ParserState::new(input); + /// let mut state: Box> = pest::ParserState::new(input); /// let mut result = state.match_insensitive("AB"); /// assert!(result.is_ok()); /// assert_eq!(result.unwrap().position().pos(), 2); @@ -641,7 +667,7 @@ impl<'i, R: RuleType> ParserState<'i, R> { /// enum Rule {} /// /// let input = "ab"; - /// let mut state: Box> = pest::ParserState::new(input); + /// let mut state: Box> = pest::ParserState::new(input); /// let mut result = state.match_range('a'..'z'); /// assert!(result.is_ok()); /// assert_eq!(result.unwrap().position().pos(), 1); @@ -672,7 +698,7 @@ impl<'i, R: RuleType> ParserState<'i, R> { /// enum Rule {} /// /// let input = "ab"; - /// let mut state: Box> = pest::ParserState::new(input); + /// let mut state: Box> = pest::ParserState::new(input); /// let mut result = state.skip(1); /// assert!(result.is_ok()); /// assert_eq!(result.unwrap().position().pos(), 1); @@ -703,7 +729,7 @@ impl<'i, R: RuleType> ParserState<'i, R> { /// enum Rule {} /// /// let input = "abcd"; - /// let mut state: Box> = pest::ParserState::new(input); + /// let mut state: Box> = pest::ParserState::new(input); /// let mut result = state.skip_until(&["c", "d"]); /// assert!(result.is_ok()); /// assert_eq!(result.unwrap().position().pos(), 2); @@ -726,7 +752,7 @@ impl<'i, R: RuleType> ParserState<'i, R> { /// enum Rule {} /// /// let input = "ab"; - /// let mut state: Box> = pest::ParserState::new(input); + /// let mut state: Box> = pest::ParserState::new(input); /// let mut result = state.start_of_input(); /// assert!(result.is_ok()); /// @@ -756,7 +782,7 @@ impl<'i, R: RuleType> ParserState<'i, R> { /// enum Rule {} /// /// let input = "ab"; - /// let mut state: Box> = pest::ParserState::new(input); + /// let mut state: Box> = pest::ParserState::new(input); /// let mut result = state.end_of_input(); /// assert!(result.is_err()); /// @@ -910,7 +936,7 @@ impl<'i, R: RuleType> ParserState<'i, R> { /// enum Rule {} /// /// let input = "ab"; - /// let mut state: Box> = pest::ParserState::new(input); + /// let mut state: Box> = pest::ParserState::new(input); /// let mut result = state.stack_push(|state| state.match_string("a")); /// assert!(result.is_ok()); /// assert_eq!(result.unwrap().position().pos(), 1); @@ -947,7 +973,7 @@ impl<'i, R: RuleType> ParserState<'i, R> { /// enum Rule {} /// /// let input = "aa"; - /// let mut state: Box> = pest::ParserState::new(input); + /// let mut state: Box> = pest::ParserState::new(input); /// let mut result = state.stack_push(|state| state.match_string("a")).and_then( /// |state| state.stack_peek() /// ); @@ -976,7 +1002,7 @@ impl<'i, R: RuleType> ParserState<'i, R> { /// enum Rule {} /// /// let input = "aa"; - /// let mut state: Box> = pest::ParserState::new(input); + /// let mut state: Box> = pest::ParserState::new(input); /// let mut result = state.stack_push(|state| state.match_string("a")).and_then( /// |state| state.stack_pop() /// ); @@ -1004,7 +1030,7 @@ impl<'i, R: RuleType> ParserState<'i, R> { /// enum Rule {} /// /// let input = "abcd cd cb"; - /// let mut state: Box> = pest::ParserState::new(input); + /// let mut state: Box> = pest::ParserState::new(input); /// let mut result = state /// .stack_push(|state| state.match_string("a")) /// .and_then(|state| state.stack_push(|state| state.match_string("b"))) @@ -1036,7 +1062,7 @@ impl<'i, R: RuleType> ParserState<'i, R> { let mut position = self.position; let result = { let mut iter_b2t = self.stack[range].iter(); - let matcher = |span: &Span| position.match_string(span.as_str()); + let matcher = |span: &Span<'_>| position.match_string(span.as_str()); match match_dir { MatchDir::BottomToTop => iter_b2t.all(matcher), MatchDir::TopToBottom => iter_b2t.rev().all(matcher), @@ -1061,7 +1087,7 @@ impl<'i, R: RuleType> ParserState<'i, R> { /// enum Rule {} /// /// let input = "abba"; - /// let mut state: Box> = pest::ParserState::new(input); + /// let mut state: Box> = pest::ParserState::new(input); /// let mut result = state /// .stack_push(|state| state.match_string("a")) /// .and_then(|state| { state.stack_push(|state| state.match_string("b")) }) @@ -1085,7 +1111,7 @@ impl<'i, R: RuleType> ParserState<'i, R> { /// enum Rule {} /// /// let input = "aaaa"; - /// let mut state: Box> = pest::ParserState::new(input); + /// let mut state: Box> = pest::ParserState::new(input); /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(|state| { /// state.stack_push(|state| state.match_string("a")) /// }).and_then(|state| state.stack_match_peek()); @@ -1123,7 +1149,7 @@ impl<'i, R: RuleType> ParserState<'i, R> { /// enum Rule {} /// /// let input = "aa"; - /// let mut state: Box> = pest::ParserState::new(input); + /// let mut state: Box> = pest::ParserState::new(input); /// let mut result = state.stack_push(|state| state.match_string("a")).and_then( /// |state| state.stack_drop() /// ); @@ -1150,7 +1176,7 @@ impl<'i, R: RuleType> ParserState<'i, R> { /// enum Rule {} /// /// let input = "ab"; - /// let mut state: Box> = pest::ParserState::new(input); + /// let mut state: Box> = pest::ParserState::new(input); /// let mut result = state.restore_on_err(|state| state.stack_push(|state| /// state.match_string("a")).and_then(|state| state.match_string("a")) /// ); diff --git a/pest/src/position.rs b/pest/src/position.rs index fa10e67b..41574a81 100644 --- a/pest/src/position.rs +++ b/pest/src/position.rs @@ -32,7 +32,7 @@ impl<'i> Position<'i> { /// # Safety: /// /// `input[pos..]` must be a valid codepoint boundary (should not panic when indexing thus). - pub(crate) unsafe fn new_unchecked(input: &str, pos: usize) -> Position { + pub(crate) unsafe fn new_unchecked(input: &str, pos: usize) -> Position<'_> { debug_assert!(input.get(pos..).is_some()); Position { input, pos } } @@ -49,7 +49,7 @@ impl<'i> Position<'i> { /// assert_eq!(Position::new(heart, 1), None); /// assert_ne!(Position::new(heart, cheart.len_utf8()), None); /// ``` - pub fn new(input: &str, pos: usize) -> Option { + pub fn new(input: &str, pos: usize) -> Option> { input.get(pos..).map(|_| Position { input, pos }) } @@ -125,7 +125,7 @@ impl<'i> Position<'i> { /// enum Rule {} /// /// let input = "\na"; - /// let mut state: Box> = pest::ParserState::new(input); + /// let mut state: Box> = pest::ParserState::new(input); /// let mut result = state.match_string("\na"); /// assert!(result.is_ok()); /// assert_eq!(result.unwrap().position().line_col(), (2, 2)); @@ -156,7 +156,7 @@ impl<'i> Position<'i> { /// enum Rule {} /// /// let input = "\na"; - /// let mut state: Box> = pest::ParserState::new(input); + /// let mut state: Box> = pest::ParserState::new(input); /// let mut result = state.match_string("\na"); /// assert!(result.is_ok()); /// assert_eq!(result.unwrap().position().line_of(), "a"); @@ -364,7 +364,7 @@ impl<'i> Position<'i> { } impl<'i> fmt::Debug for Position<'i> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("Position").field("pos", &self.pos).finish() } } diff --git a/pest/src/pratt_parser.rs b/pest/src/pratt_parser.rs index fb6be851..be75f7fa 100644 --- a/pest/src/pratt_parser.rs +++ b/pest/src/pratt_parser.rs @@ -265,7 +265,7 @@ type InfixFn<'i, R, T> = Box, T) -> T + 'i>; /// [`PrattParser`]: struct.PrattParser.html pub struct PrattParserMap<'pratt, 'i, R, F, T> where - R: RuleType + 'pratt, + R: RuleType, F: FnMut(Pair<'i, R>) -> T, { pratt: &'pratt PrattParser, @@ -308,6 +308,14 @@ where self } + /// The last method to call on the provided pairs to execute the Pratt + /// parser (previously defined using [`map_primary`], [`map_prefix`], [`map_postfix`], + /// and [`map_infix`] methods). + /// + /// [`map_primary`]: struct.PrattParser.html#method.map_primary + /// [`map_prefix`]: struct.PrattParserMap.html#method.map_prefix + /// [`map_postfix`]: struct.PrattParserMap.html#method.map_postfix + /// [`map_infix`]: struct.PrattParserMap.html#method.map_infix pub fn parse>>(&mut self, pairs: P) -> T { self.expr(&mut pairs.peekable(), 0) } diff --git a/pest/src/span.rs b/pest/src/span.rs index 4007049c..7603c43c 100644 --- a/pest/src/span.rs +++ b/pest/src/span.rs @@ -38,7 +38,7 @@ impl<'i> Span<'i> { /// # Safety /// /// `input[start..end]` must be a valid subslice; that is, said indexing should not panic. - pub(crate) unsafe fn new_unchecked(input: &str, start: usize, end: usize) -> Span { + pub(crate) unsafe fn new_unchecked(input: &str, start: usize, end: usize) -> Span<'_> { debug_assert!(input.get(start..end).is_some()); Span { input, start, end } } @@ -54,7 +54,7 @@ impl<'i> Span<'i> { /// assert_eq!(None, Span::new(input, 100, 0)); /// assert!(Span::new(input, 0, input.len()).is_some()); /// ``` - pub fn new(input: &str, start: usize, end: usize) -> Option { + pub fn new(input: &str, start: usize, end: usize) -> Option> { if input.get(start..end).is_some() { Some(Span { input, start, end }) } else { @@ -200,7 +200,7 @@ impl<'i> Span<'i> { /// enum Rule {} /// /// let input = "abc"; - /// let mut state: Box> = pest::ParserState::new(input).skip(1).unwrap(); + /// let mut state: Box> = pest::ParserState::new(input).skip(1).unwrap(); /// let start_pos = state.position().clone(); /// state = state.match_string("b").unwrap(); /// let span = start_pos.span(&state.position().clone()); @@ -223,14 +223,14 @@ impl<'i> Span<'i> { /// enum Rule {} /// /// let input = "a\nb\nc"; - /// let mut state: Box> = pest::ParserState::new(input).skip(2).unwrap(); + /// let mut state: Box> = pest::ParserState::new(input).skip(2).unwrap(); /// let start_pos = state.position().clone(); /// state = state.match_string("b\nc").unwrap(); /// let span = start_pos.span(&state.position().clone()); /// assert_eq!(span.lines().collect::>(), vec!["b\n", "c"]); /// ``` #[inline] - pub fn lines(&self) -> Lines { + pub fn lines(&self) -> Lines<'_> { Lines { inner: self.lines_span(), } @@ -248,13 +248,13 @@ impl<'i> Span<'i> { /// enum Rule {} /// /// let input = "a\nb\nc"; - /// let mut state: Box> = pest::ParserState::new(input).skip(2).unwrap(); + /// let mut state: Box> = pest::ParserState::new(input).skip(2).unwrap(); /// let start_pos = state.position().clone(); /// state = state.match_string("b\nc").unwrap(); /// let span = start_pos.span(&state.position().clone()); /// assert_eq!(span.lines_span().collect::>(), vec![Span::new(input, 2, 4).unwrap(), Span::new(input, 4, 5).unwrap()]); /// ``` - pub fn lines_span(&self) -> LinesSpan { + pub fn lines_span(&self) -> LinesSpan<'_> { LinesSpan { span: self, pos: self.start, @@ -263,7 +263,7 @@ impl<'i> Span<'i> { } impl<'i> fmt::Debug for Span<'i> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("Span") .field("str", &self.as_str()) .field("start", &self.start) diff --git a/pest/src/token.rs b/pest/src/token.rs index 68cb1a6e..6ca10c5b 100644 --- a/pest/src/token.rs +++ b/pest/src/token.rs @@ -13,7 +13,17 @@ use crate::position::Position; #[derive(Clone, Debug, Eq, Hash, PartialEq)] pub enum Token<'i, R> { /// The starting `Position` of a matched `Rule` - Start { rule: R, pos: Position<'i> }, + Start { + /// matched rule + rule: R, + /// starting position + pos: Position<'i>, + }, /// The ending `Position` of a matched `Rule` - End { rule: R, pos: Position<'i> }, + End { + /// matched rule + rule: R, + /// ending position + pos: Position<'i>, + }, } diff --git a/pest/tests/calculator.rs b/pest/tests/calculator.rs index 7935877a..c5270550 100644 --- a/pest/tests/calculator.rs +++ b/pest/tests/calculator.rs @@ -33,7 +33,9 @@ struct CalculatorParser; impl Parser for CalculatorParser { fn parse(rule: Rule, input: &str) -> Result, Error> { - fn expression(state: Box>) -> ParseResult>> { + fn expression( + state: Box>, + ) -> ParseResult>> { state.rule(Rule::expression, |s| { s.sequence(|s| { primary(s).and_then(|s| { @@ -53,7 +55,7 @@ impl Parser for CalculatorParser { }) } - fn primary(state: Box>) -> ParseResult>> { + fn primary(state: Box>) -> ParseResult>> { state .sequence(|s| { s.match_string("(") @@ -63,7 +65,7 @@ impl Parser for CalculatorParser { .or_else(number) } - fn number(state: Box>) -> ParseResult>> { + fn number(state: Box>) -> ParseResult>> { state.rule(Rule::number, |s| { s.sequence(|s| { s.optional(|s| s.match_string("-")).and_then(|s| { @@ -78,27 +80,27 @@ impl Parser for CalculatorParser { }) } - fn plus(state: Box>) -> ParseResult>> { + fn plus(state: Box>) -> ParseResult>> { state.rule(Rule::plus, |s| s.match_string("+")) } - fn minus(state: Box>) -> ParseResult>> { + fn minus(state: Box>) -> ParseResult>> { state.rule(Rule::minus, |s| s.match_string("-")) } - fn times(state: Box>) -> ParseResult>> { + fn times(state: Box>) -> ParseResult>> { state.rule(Rule::times, |s| s.match_string("*")) } - fn divide(state: Box>) -> ParseResult>> { + fn divide(state: Box>) -> ParseResult>> { state.rule(Rule::divide, |s| s.match_string("/")) } - fn modulus(state: Box>) -> ParseResult>> { + fn modulus(state: Box>) -> ParseResult>> { state.rule(Rule::modulus, |s| s.match_string("%")) } - fn power(state: Box>) -> ParseResult>> { + fn power(state: Box>) -> ParseResult>> { state.rule(Rule::power, |s| s.match_string("^")) } diff --git a/pest/tests/json.rs b/pest/tests/json.rs index b9338c34..b66f39df 100644 --- a/pest/tests/json.rs +++ b/pest/tests/json.rs @@ -39,11 +39,11 @@ struct JsonParser; impl Parser for JsonParser { fn parse(rule: Rule, input: &str) -> Result, Error> { - fn json(state: Box>) -> ParseResult>> { + fn json(state: Box>) -> ParseResult>> { value(state) } - fn object(state: Box>) -> ParseResult>> { + fn object(state: Box>) -> ParseResult>> { state.rule(Rule::object, |s| { s.sequence(|s| { s.match_string("{") @@ -72,7 +72,7 @@ impl Parser for JsonParser { }) } - fn pair(state: Box>) -> ParseResult>> { + fn pair(state: Box>) -> ParseResult>> { state.rule(Rule::pair, |s| { s.sequence(|s| { string(s) @@ -84,7 +84,7 @@ impl Parser for JsonParser { }) } - fn array(state: Box>) -> ParseResult>> { + fn array(state: Box>) -> ParseResult>> { state.rule(Rule::array, |s| { s.sequence(|s| { s.match_string("[") @@ -113,7 +113,7 @@ impl Parser for JsonParser { }) } - fn value(state: Box>) -> ParseResult>> { + fn value(state: Box>) -> ParseResult>> { state.rule(Rule::value, |s| { string(s) .or_else(number) @@ -124,7 +124,7 @@ impl Parser for JsonParser { }) } - fn string(state: Box>) -> ParseResult>> { + fn string(state: Box>) -> ParseResult>> { state.rule(Rule::string, |s| { s.match_string("\"") .and_then(|s| { @@ -143,7 +143,7 @@ impl Parser for JsonParser { }) } - fn escape(state: Box>) -> ParseResult>> { + fn escape(state: Box>) -> ParseResult>> { state.sequence(|s| { s.match_string("\\").and_then(|s| { s.match_string("\"") @@ -159,7 +159,7 @@ impl Parser for JsonParser { }) } - fn unicode(state: Box>) -> ParseResult>> { + fn unicode(state: Box>) -> ParseResult>> { state.sequence(|s| { s.match_string("u") .and_then(hex) @@ -168,14 +168,14 @@ impl Parser for JsonParser { }) } - fn hex(state: Box>) -> ParseResult>> { + fn hex(state: Box>) -> ParseResult>> { state .match_range('0'..'9') .or_else(|s| s.match_range('a'..'f')) .or_else(|s| s.match_range('A'..'F')) } - fn number(state: Box>) -> ParseResult>> { + fn number(state: Box>) -> ParseResult>> { state.rule(Rule::number, |s| { s.sequence(|s| { s.optional(|s| s.match_string("-")) @@ -195,7 +195,7 @@ impl Parser for JsonParser { }) } - fn int(state: Box>) -> ParseResult>> { + fn int(state: Box>) -> ParseResult>> { state.match_string("0").or_else(|s| { s.sequence(|s| { s.match_range('1'..'9') @@ -204,7 +204,7 @@ impl Parser for JsonParser { }) } - fn exp(state: Box>) -> ParseResult>> { + fn exp(state: Box>) -> ParseResult>> { state.sequence(|s| { s.match_string("E") .or_else(|s| s.match_string("e")) @@ -215,17 +215,17 @@ impl Parser for JsonParser { }) } - fn bool(state: Box>) -> ParseResult>> { + fn bool(state: Box>) -> ParseResult>> { state.rule(Rule::bool, |s| { s.match_string("true").or_else(|s| s.match_string("false")) }) } - fn null(state: Box>) -> ParseResult>> { + fn null(state: Box>) -> ParseResult>> { state.rule(Rule::null, |s| s.match_string("null")) } - fn skip(state: Box>) -> ParseResult>> { + fn skip(state: Box>) -> ParseResult>> { state.repeat(|s| { s.match_string(" ") .or_else(|s| s.match_string("\t")) diff --git a/vm/Cargo.toml b/vm/Cargo.toml index 1d07e4a7..3912a32c 100644 --- a/vm/Cargo.toml +++ b/vm/Cargo.toml @@ -1,10 +1,10 @@ [package] name = "pest_vm" description = "pest grammar virtual machine" -version = "2.4.0" +version = "2.4.1" edition = "2021" authors = ["Dragoș Tiselice "] -homepage = "https://pest-parser.github.io/" +homepage = "https://pest.rs/" repository = "https://github.com/pest-parser/pest" documentation = "https://docs.rs/pest" keywords = ["pest", "vm"] @@ -14,5 +14,5 @@ readme = "_README.md" rust-version = "1.56" [dependencies] -pest = { path = "../pest", version = "2.4.0" } -pest_meta = { path = "../meta", version = "2.4.0" } +pest = { path = "../pest", version = "2.4.1" } +pest_meta = { path = "../meta", version = "2.4.1" } diff --git a/vm/src/lib.rs b/vm/src/lib.rs index c7a15b7d..7bd121d0 100644 --- a/vm/src/lib.rs +++ b/vm/src/lib.rs @@ -6,9 +6,15 @@ // license , at your // option. All files in the project carrying such notice may not be copied, // modified, or distributed except according to those terms. +//! # pest vm +//! +//! This crate run ASTs on-the-fly and is used by the fiddle and debugger. -extern crate pest; -extern crate pest_meta; +#![doc( + html_logo_url = "https://raw.githubusercontent.com/pest-parser/pest/master/pest-logo.svg", + html_favicon_url = "https://raw.githubusercontent.com/pest-parser/pest/master/pest-logo.svg" +)] +#![warn(missing_docs, rust_2018_idioms, unused_qualifications)] use pest::error::Error; use pest::iterators::Pairs; @@ -21,16 +27,19 @@ use std::collections::HashMap; mod macros; +/// A virtual machine-like construct that runs an AST on-the-fly pub struct Vm { rules: HashMap, } impl Vm { + /// Creates a new `Vm` from optimized rules pub fn new(rules: Vec) -> Vm { let rules = rules.into_iter().map(|r| (r.name.clone(), r)).collect(); Vm { rules } } + /// Runs a parser rule on an input pub fn parse<'a, 'i>( &'a self, rule: &'a str, diff --git a/vm/src/macros.rs b/vm/src/macros.rs index ab641bdb..ec8e6c61 100644 --- a/vm/src/macros.rs +++ b/vm/src/macros.rs @@ -127,6 +127,7 @@ macro_rules! consumes_to { }; } +/// A macro that tests input parses to the expected tokens. #[macro_export] macro_rules! parses_to { ( parser: $parser:expr, input: $string:expr, rule: $rule:expr, @@ -171,6 +172,7 @@ macro_rules! parses_to { }; } +/// A macro that tests input fails to parse. #[macro_export] macro_rules! fails_with { ( parser: $parser:expr, input: $string:expr, rule: $rule:expr,