Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow use of rust keywords as pest rules #750

Merged
merged 5 commits into from Dec 14, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
39 changes: 27 additions & 12 deletions generator/src/generator.rs
Expand Up @@ -169,7 +169,7 @@ fn generate_builtin_rules() -> Vec<(&'static str, TokenStream)> {

// Needed because Cargo doesn't watch for changes in grammars.
fn generate_include(name: &Ident, path: &str) -> TokenStream {
let const_name = Ident::new(&format!("_PEST_GRAMMAR_{}", name), Span::call_site());
let const_name = format_ident!("_PEST_GRAMMAR_{}", name);
// Need to make this relative to the current directory since the path to the file
// is derived from the CARGO_MANIFEST_DIR environment variable
let mut current_dir = std::env::current_dir().expect("Unable to get current directory");
Expand All @@ -184,7 +184,7 @@ fn generate_include(name: &Ident, path: &str) -> TokenStream {
fn generate_enum(rules: &[OptimizedRule], uses_eoi: bool) -> TokenStream {
let rules = rules
.iter()
.map(|rule| Ident::new(rule.name.as_str(), Span::call_site()));
.map(|rule| format_ident!("r#{}", rule.name));
if uses_eoi {
quote! {
#[allow(dead_code, non_camel_case_types, clippy::upper_case_acronyms)]
Expand All @@ -209,7 +209,7 @@ fn generate_patterns(rules: &[OptimizedRule], uses_eoi: bool) -> TokenStream {
let mut rules: Vec<TokenStream> = rules
.iter()
.map(|rule| {
let rule = Ident::new(rule.name.as_str(), Span::call_site());
let rule = format_ident!("r#{}", rule.name);
quote! {
Rule::#rule => rules::#rule(state)
}
Expand All @@ -228,7 +228,8 @@ fn generate_patterns(rules: &[OptimizedRule], uses_eoi: bool) -> TokenStream {
}

fn generate_rule(rule: OptimizedRule) -> TokenStream {
let name = Ident::new(&rule.name, Span::call_site());
let name = format_ident!("r#{}", rule.name);

let expr = if rule.ty == RuleType::Atomic || rule.ty == RuleType::CompoundAtomic {
generate_expr_atomic(rule.expr)
} else if name == "WHITESPACE" || name == "COMMENT" {
Expand Down Expand Up @@ -364,7 +365,7 @@ fn generate_expr(expr: OptimizedExpr) -> TokenStream {
}
}
OptimizedExpr::Ident(ident) => {
let ident = Ident::new(&ident, Span::call_site());
let ident = format_ident!("r#{}", ident);
quote! { self::#ident(state) }
}
OptimizedExpr::PeekSlice(start, end_) => {
Expand Down Expand Up @@ -510,7 +511,7 @@ fn generate_expr_atomic(expr: OptimizedExpr) -> TokenStream {
}
}
OptimizedExpr::Ident(ident) => {
let ident = Ident::new(&ident, Span::call_site());
let ident = format_ident!("r#{}", ident);
quote! { self::#ident(state) }
}
OptimizedExpr::PeekSlice(start, end_) => {
Expand Down Expand Up @@ -675,7 +676,7 @@ mod tests {
#[allow(dead_code, non_camel_case_types, clippy::upper_case_acronyms)]
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub enum Rule {
f
r#f
}
}
.to_string()
Expand Down Expand Up @@ -863,7 +864,7 @@ mod tests {
assert_eq!(
generate_expr(expr).to_string(),
quote! {
self::a(state).or_else(|state| {
self::r#a(state).or_else(|state| {
state.sequence(|state| {
state.match_range('a'..'b').and_then(|state| {
super::hidden::skip(state)
Expand Down Expand Up @@ -929,7 +930,7 @@ mod tests {
assert_eq!(
generate_expr_atomic(expr).to_string(),
quote! {
self::a(state).or_else(|state| {
self::r#a(state).or_else(|state| {
state.sequence(|state| {
state.match_range('a'..'b').and_then(|state| {
state.lookahead(false, |state| {
Expand Down Expand Up @@ -960,11 +961,17 @@ mod tests {
fn generate_complete() {
let name = Ident::new("MyParser", Span::call_site());
let generics = Generics::default();

let rules = vec![OptimizedRule {
name: "a".to_owned(),
ty: RuleType::Silent,
expr: OptimizedExpr::Str("b".to_owned()),
}, OptimizedRule {
name: "if".to_owned(),
ty: RuleType::Silent,
expr: OptimizedExpr::Ident("a".to_owned())
}];

let defaults = vec!["ANY"];
let result = result_type();
let box_ty = box_type();
Expand All @@ -980,7 +987,8 @@ mod tests {
#[allow(dead_code, non_camel_case_types, clippy::upper_case_acronyms)]
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub enum Rule {
a
r#a,
r#if
}

#[allow(clippy::all)]
Expand Down Expand Up @@ -1009,10 +1017,16 @@ mod tests {

#[inline]
#[allow(non_snake_case, unused_variables)]
pub fn a(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> {
pub fn r#a(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> {
state.match_string("b")
}

#[inline]
#[allow(non_snake_case, unused_variables)]
pub fn r#if(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> {
self::r#a(state)
}

#[inline]
#[allow(dead_code, non_snake_case, unused_variables)]
pub fn ANY(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> {
Expand All @@ -1025,7 +1039,8 @@ mod tests {

::pest::state(input, |state| {
match rule {
Rule::a => rules::a(state)
Rule::r#a => rules::r#a(state),
Rule::r#if => rules::r#if(state)
}
})
}
Expand Down
40 changes: 1 addition & 39 deletions meta/src/validator.rs
Expand Up @@ -71,7 +71,6 @@ static BUILTINS: Lazy<HashSet<&'static str>> = Lazy::new(|| {
});

/// It checks the parsed grammar for common mistakes:
/// - using Rust keywords
/// - using Pest keywords
/// - duplicate rules
/// - undefined rules
Expand All @@ -84,6 +83,7 @@ pub fn validate_pairs(pairs: Pairs<'_, Rule>) -> Result<Vec<&str>, Vec<Error<Rul
.filter(|pair| pair.as_rule() == Rule::grammar_rule)
.map(|pair| pair.into_inner().next().unwrap().as_span())
.collect();

let called_rules: Vec<_> = pairs
.clone()
.filter(|pair| pair.as_rule() == Rule::grammar_rule)
Expand All @@ -98,7 +98,6 @@ pub fn validate_pairs(pairs: Pairs<'_, Rule>) -> Result<Vec<&str>, Vec<Error<Rul

let mut errors = vec![];

errors.extend(validate_rust_keywords(&definitions));
errors.extend(validate_pest_keywords(&definitions));
errors.extend(validate_already_defined(&definitions));
errors.extend(validate_undefined(&definitions, &called_rules));
Expand All @@ -115,27 +114,6 @@ pub fn validate_pairs(pairs: Pairs<'_, Rule>) -> Result<Vec<&str>, Vec<Error<Rul
Ok(defaults.cloned().collect())
}

/// Validates that the given `definitions` do not contain any Rust keywords.
#[allow(clippy::ptr_arg)]
pub fn validate_rust_keywords(definitions: &Vec<Span<'_>>) -> Vec<Error<Rule>> {
DvvCz marked this conversation as resolved.
Show resolved Hide resolved
let mut errors = vec![];

for definition in definitions {
let name = definition.as_str();

if RUST_KEYWORDS.contains(name) {
errors.push(Error::new_from_span(
ErrorVariant::CustomError {
message: format!("{} is a rust keyword", name),
},
*definition,
))
}
}

errors
}

/// Validates that the given `definitions` do not contain any Pest keywords.
#[allow(clippy::ptr_arg)]
pub fn validate_pest_keywords(definitions: &Vec<Span<'_>>) -> Vec<Error<Rule>> {
Expand Down Expand Up @@ -507,22 +485,6 @@ mod tests {
#[test]
#[should_panic(expected = "grammar error

--> 1:1
|
1 | let = { \"a\" }
| ^-^
|
= let is a rust keyword")]
fn rust_keyword() {
let input = "let = { \"a\" }";
unwrap_or_report(validate_pairs(
PestParser::parse(Rule::grammar_rules, input).unwrap(),
));
}

#[test]
#[should_panic(expected = "grammar error

--> 1:1
|
1 | ANY = { \"a\" }
Expand Down