Skip to content

Commit

Permalink
Rewrite pest_meta::UNICODE_PROPERTY_NAMES to pest::unicode::unicode_p…
Browse files Browse the repository at this point in the history
…roperty_names.

 will generate property names by use macro.
  • Loading branch information
huacnlee committed Dec 22, 2022
1 parent 524bfd7 commit a2938fc
Show file tree
Hide file tree
Showing 4 changed files with 7,032 additions and 326 deletions.
4 changes: 2 additions & 2 deletions generator/src/generator.rs
Expand Up @@ -13,9 +13,9 @@ use proc_macro2::TokenStream;
use quote::{ToTokens, TokenStreamExt};
use syn::{self, Generics, Ident};

use pest::unicode::unicode_property_names;
use pest_meta::ast::*;
use pest_meta::optimizer::*;
use pest_meta::UNICODE_PROPERTY_NAMES;

pub fn generate(
name: Ident,
Expand Down Expand Up @@ -153,7 +153,7 @@ fn generate_builtin_rules() -> Vec<(&'static str, TokenStream)> {

let box_ty = box_type();

for property in UNICODE_PROPERTY_NAMES {
for property in unicode_property_names() {
let property_ident: Ident = syn::parse_str(property).unwrap();
// insert manually for #property substitution
builtins.push((property, quote! {
Expand Down
4 changes: 2 additions & 2 deletions meta/src/validator.rs
Expand Up @@ -15,10 +15,10 @@ use std::collections::{HashMap, HashSet};

use pest::error::{Error, ErrorVariant, InputLocation};
use pest::iterators::Pairs;
use pest::unicode::unicode_property_names;
use pest::Span;

use crate::parser::{ParserExpr, ParserNode, ParserRule, Rule};
use crate::UNICODE_PROPERTY_NAMES;

static RUST_KEYWORDS: Lazy<HashSet<&'static str>> = Lazy::new(|| {
[
Expand Down Expand Up @@ -66,7 +66,7 @@ static BUILTINS: Lazy<HashSet<&'static str>> = Lazy::new(|| {
]
.iter()
.cloned()
.chain(UNICODE_PROPERTY_NAMES.iter().cloned())
.chain(unicode_property_names())
.collect::<HashSet<&str>>()
});

Expand Down
33 changes: 19 additions & 14 deletions pest/src/unicode/mod.rs
Expand Up @@ -10,21 +10,26 @@ use alloc::boxed::Box;
macro_rules! char_property_functions {
{$(
mod $module:ident;
[$(
static $property_names:ident = [$(
$prop:ident,
)*];
)*} => {$(
#[allow(unused)]
mod $module;
// ALPHABETIC('a')
$(pub fn $prop(c: char) -> bool {
self::$module::$prop.contains_char(c)
})*

pub static $property_names: &[&str] = &[
$(stringify!($prop),)*
];
)*};
}

char_property_functions! {
mod binary;
[
static BINARY_PROPERTY_NAMES = [
// ASCII_HEX_DIGIT, // let this one be stripped out -- the full trie is wasteful for ASCII
ALPHABETIC, BIDI_CONTROL, CASE_IGNORABLE, CASED, CHANGES_WHEN_CASEFOLDED,
CHANGES_WHEN_CASEMAPPED, CHANGES_WHEN_LOWERCASED, CHANGES_WHEN_TITLECASED,
Expand All @@ -40,7 +45,7 @@ char_property_functions! {
];

mod category;
[
static CATEGORY_PROPERTY_NAMES = [
CASED_LETTER, CLOSE_PUNCTUATION, CONNECTOR_PUNCTUATION, CONTROL, CURRENCY_SYMBOL,
DASH_PUNCTUATION, DECIMAL_NUMBER, ENCLOSING_MARK, FINAL_PUNCTUATION, FORMAT,
INITIAL_PUNCTUATION, LETTER, LETTER_NUMBER, LINE_SEPARATOR, LOWERCASE_LETTER, MARK,
Expand All @@ -51,11 +56,20 @@ char_property_functions! {
];

mod script;
[
HAN, KATAKANA, HIRAGANA, HANGUL,
static SCRIPT_PROPERTY_NAMES = [
];
}

pub fn unicode_property_names() -> Box<dyn Iterator<Item = &'static str>> {
Box::new(
BINARY_PROPERTY_NAMES
.iter()
.map(|name| *name)
.chain(CATEGORY_PROPERTY_NAMES.iter().map(|name| *name))
.chain(SCRIPT_PROPERTY_NAMES.iter().map(|name| *name)),
)
}

pub fn by_name(name: &str) -> Option<Box<dyn Fn(char) -> bool>> {
for property in binary::BY_NAME {
if name == property.0.to_uppercase() {
Expand All @@ -75,14 +89,5 @@ pub fn by_name(name: &str) -> Option<Box<dyn Fn(char) -> bool>> {
}
}

if name == "CJK" {
return Some(Box::new(|c| {
script::HAN.contains_char(c)
|| script::HANGUL.contains_char(c)
|| script::KATAKANA.contains_char(c)
|| script::HIRAGANA.contains_char(c)
}));
}

None
}

0 comments on commit a2938fc

Please sign in to comment.