Skip to content

Commit

Permalink
Allow use of rust keywords as pest rules (#750)
Browse files Browse the repository at this point in the history
* Allow use of rust keywords as rule names

This prefixes all non-builtin rules with r# to allow for use of rust keywords as pest rules.

This refactors code to use format_ident! rather than Ident::new (with format! internally in some cases), as it does the same thing internally. Span is defined to fallback to Span::call_site() in case of being given a non-ident, so there shouldn't be any issues.

Updated generate_complete test case and removed the rust keyword restriction from the validator.

* Test fixes and missed simplifications

* Updated tests that use identifiers to escape them with `r#`

* Changed the previously added `generate_complete` `r#if` test to ensure using an identifier as an expression is properly emitted

* Removed some redundant Ident::new() inside of `format_ident!` calls

* `fmt` & Revert removal of `validate_rust_keywords`

* Remove unused `Span` import

* Fix whitespace/comment issue

`generate_rule` was checking the name after formatting for whether it was `WHITESPACE` or `COMMENT`.

Re-imported proc_macro2::Span for tests since they still use it.
  • Loading branch information
DvvCz committed Dec 14, 2022
1 parent 1e40766 commit 2c47201
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 40 deletions.
59 changes: 38 additions & 21 deletions generator/src/generator.rs
Expand Up @@ -9,7 +9,7 @@

use std::path::PathBuf;

use proc_macro2::{Span, TokenStream};
use proc_macro2::TokenStream;
use quote::{ToTokens, TokenStreamExt};
use syn::{self, Generics, Ident};

Expand Down Expand Up @@ -169,7 +169,7 @@ fn generate_builtin_rules() -> Vec<(&'static str, TokenStream)> {

// Needed because Cargo doesn't watch for changes in grammars.
fn generate_include(name: &Ident, path: &str) -> TokenStream {
let const_name = Ident::new(&format!("_PEST_GRAMMAR_{}", name), Span::call_site());
let const_name = format_ident!("_PEST_GRAMMAR_{}", name);
// Need to make this relative to the current directory since the path to the file
// is derived from the CARGO_MANIFEST_DIR environment variable
let mut current_dir = std::env::current_dir().expect("Unable to get current directory");
Expand All @@ -182,9 +182,7 @@ fn generate_include(name: &Ident, path: &str) -> TokenStream {
}

fn generate_enum(rules: &[OptimizedRule], uses_eoi: bool) -> TokenStream {
let rules = rules
.iter()
.map(|rule| Ident::new(rule.name.as_str(), Span::call_site()));
let rules = rules.iter().map(|rule| format_ident!("r#{}", rule.name));
if uses_eoi {
quote! {
#[allow(dead_code, non_camel_case_types, clippy::upper_case_acronyms)]
Expand All @@ -209,7 +207,7 @@ fn generate_patterns(rules: &[OptimizedRule], uses_eoi: bool) -> TokenStream {
let mut rules: Vec<TokenStream> = rules
.iter()
.map(|rule| {
let rule = Ident::new(rule.name.as_str(), Span::call_site());
let rule = format_ident!("r#{}", rule.name);
quote! {
Rule::#rule => rules::#rule(state)
}
Expand All @@ -228,10 +226,10 @@ fn generate_patterns(rules: &[OptimizedRule], uses_eoi: bool) -> TokenStream {
}

fn generate_rule(rule: OptimizedRule) -> TokenStream {
let name = Ident::new(&rule.name, Span::call_site());
let name = format_ident!("r#{}", rule.name);
let expr = if rule.ty == RuleType::Atomic || rule.ty == RuleType::CompoundAtomic {
generate_expr_atomic(rule.expr)
} else if name == "WHITESPACE" || name == "COMMENT" {
} else if rule.name == "WHITESPACE" || rule.name == "COMMENT" {
let atomic = generate_expr_atomic(rule.expr);

quote! {
Expand Down Expand Up @@ -364,7 +362,7 @@ fn generate_expr(expr: OptimizedExpr) -> TokenStream {
}
}
OptimizedExpr::Ident(ident) => {
let ident = Ident::new(&ident, Span::call_site());
let ident = format_ident!("r#{}", ident);
quote! { self::#ident(state) }
}
OptimizedExpr::PeekSlice(start, end_) => {
Expand Down Expand Up @@ -510,7 +508,7 @@ fn generate_expr_atomic(expr: OptimizedExpr) -> TokenStream {
}
}
OptimizedExpr::Ident(ident) => {
let ident = Ident::new(&ident, Span::call_site());
let ident = format_ident!("r#{}", ident);
quote! { self::#ident(state) }
}
OptimizedExpr::PeekSlice(start, end_) => {
Expand Down Expand Up @@ -659,6 +657,8 @@ fn option_type() -> TokenStream {

#[cfg(test)]
mod tests {
use proc_macro2::Span;

use super::*;

#[test]
Expand All @@ -675,7 +675,7 @@ mod tests {
#[allow(dead_code, non_camel_case_types, clippy::upper_case_acronyms)]
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub enum Rule {
f
r#f
}
}
.to_string()
Expand Down Expand Up @@ -863,7 +863,7 @@ mod tests {
assert_eq!(
generate_expr(expr).to_string(),
quote! {
self::a(state).or_else(|state| {
self::r#a(state).or_else(|state| {
state.sequence(|state| {
state.match_range('a'..'b').and_then(|state| {
super::hidden::skip(state)
Expand Down Expand Up @@ -929,7 +929,7 @@ mod tests {
assert_eq!(
generate_expr_atomic(expr).to_string(),
quote! {
self::a(state).or_else(|state| {
self::r#a(state).or_else(|state| {
state.sequence(|state| {
state.match_range('a'..'b').and_then(|state| {
state.lookahead(false, |state| {
Expand Down Expand Up @@ -960,11 +960,20 @@ mod tests {
fn generate_complete() {
let name = Ident::new("MyParser", Span::call_site());
let generics = Generics::default();
let rules = vec![OptimizedRule {
name: "a".to_owned(),
ty: RuleType::Silent,
expr: OptimizedExpr::Str("b".to_owned()),
}];

let rules = vec![
OptimizedRule {
name: "a".to_owned(),
ty: RuleType::Silent,
expr: OptimizedExpr::Str("b".to_owned()),
},
OptimizedRule {
name: "if".to_owned(),
ty: RuleType::Silent,
expr: OptimizedExpr::Ident("a".to_owned()),
},
];

let defaults = vec!["ANY"];
let result = result_type();
let box_ty = box_type();
Expand All @@ -980,7 +989,8 @@ mod tests {
#[allow(dead_code, non_camel_case_types, clippy::upper_case_acronyms)]
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub enum Rule {
a
r#a,
r#if
}

#[allow(clippy::all)]
Expand Down Expand Up @@ -1009,10 +1019,16 @@ mod tests {

#[inline]
#[allow(non_snake_case, unused_variables)]
pub fn a(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> {
pub fn r#a(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> {
state.match_string("b")
}

#[inline]
#[allow(non_snake_case, unused_variables)]
pub fn r#if(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> {
self::r#a(state)
}

#[inline]
#[allow(dead_code, non_snake_case, unused_variables)]
pub fn ANY(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> {
Expand All @@ -1025,7 +1041,8 @@ mod tests {

::pest::state(input, |state| {
match rule {
Rule::a => rules::a(state)
Rule::r#a => rules::r#a(state),
Rule::r#if => rules::r#if(state)
}
})
}
Expand Down
20 changes: 2 additions & 18 deletions meta/src/validator.rs
Expand Up @@ -71,7 +71,6 @@ static BUILTINS: Lazy<HashSet<&'static str>> = Lazy::new(|| {
});

/// It checks the parsed grammar for common mistakes:
/// - using Rust keywords
/// - using Pest keywords
/// - duplicate rules
/// - undefined rules
Expand All @@ -84,6 +83,7 @@ pub fn validate_pairs(pairs: Pairs<'_, Rule>) -> Result<Vec<&str>, Vec<Error<Rul
.filter(|pair| pair.as_rule() == Rule::grammar_rule)
.map(|pair| pair.into_inner().next().unwrap().as_span())
.collect();

let called_rules: Vec<_> = pairs
.clone()
.filter(|pair| pair.as_rule() == Rule::grammar_rule)
Expand All @@ -98,7 +98,6 @@ pub fn validate_pairs(pairs: Pairs<'_, Rule>) -> Result<Vec<&str>, Vec<Error<Rul

let mut errors = vec![];

errors.extend(validate_rust_keywords(&definitions));
errors.extend(validate_pest_keywords(&definitions));
errors.extend(validate_already_defined(&definitions));
errors.extend(validate_undefined(&definitions, &called_rules));
Expand All @@ -117,6 +116,7 @@ pub fn validate_pairs(pairs: Pairs<'_, Rule>) -> Result<Vec<&str>, Vec<Error<Rul

/// Validates that the given `definitions` do not contain any Rust keywords.
#[allow(clippy::ptr_arg)]
#[deprecated = "Rust keywords are no longer restricted from the pest grammar"]
pub fn validate_rust_keywords(definitions: &Vec<Span<'_>>) -> Vec<Error<Rule>> {
let mut errors = vec![];

Expand Down Expand Up @@ -507,22 +507,6 @@ mod tests {
#[test]
#[should_panic(expected = "grammar error
--> 1:1
|
1 | let = { \"a\" }
| ^-^
|
= let is a rust keyword")]
fn rust_keyword() {
let input = "let = { \"a\" }";
unwrap_or_report(validate_pairs(
PestParser::parse(Rule::grammar_rules, input).unwrap(),
));
}

#[test]
#[should_panic(expected = "grammar error
--> 1:1
|
1 | ANY = { \"a\" }
Expand Down
2 changes: 1 addition & 1 deletion vm/src/lib.rs
Expand Up @@ -126,7 +126,7 @@ impl Vm {
};

if let Some(rule) = self.rules.get(rule) {
if &rule.name == "WHITESPACE" || &rule.name == "COMMENT" {
if rule.name == "WHITESPACE" || rule.name == "COMMENT" {
match rule.ty {
RuleType::Normal => state.rule(&rule.name, |state| {
state.atomic(Atomicity::Atomic, |state| {
Expand Down

0 comments on commit 2c47201

Please sign in to comment.