From 635e0f6e7d87f250582f5dacd259ccbe9d2c80ea Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sat, 3 Sep 2022 10:54:39 -0700 Subject: [PATCH] unicode: regenerate DFAs to make use of once_cell The latest version of ucd-generate now uses once_cell instead of lazy_static. So we re-generate the DFAs to bring in that change and drop the lazy_static dependency. Closes #124 --- Cargo.toml | 4 ++-- src/unicode/fsm/grapheme_break_fwd.rs | 24 +++++++++------------- src/unicode/fsm/grapheme_break_rev.rs | 24 +++++++++------------- src/unicode/fsm/regional_indicator_rev.rs | 24 +++++++++------------- src/unicode/fsm/sentence_break_fwd.rs | 24 +++++++++------------- src/unicode/fsm/simple_word_fwd.rs | 24 +++++++++------------- src/unicode/fsm/whitespace_anchored_fwd.rs | 24 +++++++++------------- src/unicode/fsm/whitespace_anchored_rev.rs | 24 +++++++++------------- src/unicode/fsm/word_break_fwd.rs | 24 +++++++++------------- 9 files changed, 82 insertions(+), 114 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 727cfa9..ac4eb9e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,12 +25,12 @@ bench = false default = ["std", "unicode"] std = ["alloc", "memchr/std", "serde?/std"] alloc = ["serde?/alloc"] -unicode = ["dep:lazy_static", "dep:regex-automata"] +unicode = ["dep:once_cell", "dep:regex-automata"] serde = ["dep:serde"] [dependencies] memchr = { version = "2.4.0", default-features = false } -lazy_static = { version = "1.2.0", optional = true } +once_cell = { version = "1.14.0", optional = true } regex-automata = { version = "0.1.5", default-features = false, optional = true } serde = { version = "1.0.85", default-features = false, optional = true } diff --git a/src/unicode/fsm/grapheme_break_fwd.rs b/src/unicode/fsm/grapheme_break_fwd.rs index 4a43ecf..dea4a7e 100644 --- a/src/unicode/fsm/grapheme_break_fwd.rs +++ b/src/unicode/fsm/grapheme_break_fwd.rs @@ -5,8 +5,9 @@ // ucd-generate 0.2.12 is available on crates.io. #[cfg(target_endian = "big")] -lazy_static::lazy_static! { - pub static ref GRAPHEME_BREAK_FWD: ::regex_automata::SparseDFA<&'static [u8], u16> = { +pub static GRAPHEME_BREAK_FWD: ::once_cell::sync::Lazy< + ::regex_automata::SparseDFA<&'static [u8], u16>, +> = ::once_cell::sync::Lazy::new(|| { #[repr(C)] struct Aligned { _align: [u8; 0], @@ -18,15 +19,13 @@ lazy_static::lazy_static! { bytes: *include_bytes!("grapheme_break_fwd.bigendian.dfa"), }; - unsafe { - ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) - } - }; -} + unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) } +}); #[cfg(target_endian = "little")] -lazy_static::lazy_static! { - pub static ref GRAPHEME_BREAK_FWD: ::regex_automata::SparseDFA<&'static [u8], u16> = { +pub static GRAPHEME_BREAK_FWD: ::once_cell::sync::Lazy< + ::regex_automata::SparseDFA<&'static [u8], u16>, +> = ::once_cell::sync::Lazy::new(|| { #[repr(C)] struct Aligned { _align: [u8; 0], @@ -38,8 +37,5 @@ lazy_static::lazy_static! { bytes: *include_bytes!("grapheme_break_fwd.littleendian.dfa"), }; - unsafe { - ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) - } - }; -} + unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) } +}); diff --git a/src/unicode/fsm/grapheme_break_rev.rs b/src/unicode/fsm/grapheme_break_rev.rs index 423a3c6..2d2cd54 100644 --- a/src/unicode/fsm/grapheme_break_rev.rs +++ b/src/unicode/fsm/grapheme_break_rev.rs @@ -5,8 +5,9 @@ // ucd-generate 0.2.12 is available on crates.io. #[cfg(target_endian = "big")] -lazy_static::lazy_static! { - pub static ref GRAPHEME_BREAK_REV: ::regex_automata::SparseDFA<&'static [u8], u16> = { +pub static GRAPHEME_BREAK_REV: ::once_cell::sync::Lazy< + ::regex_automata::SparseDFA<&'static [u8], u16>, +> = ::once_cell::sync::Lazy::new(|| { #[repr(C)] struct Aligned { _align: [u8; 0], @@ -18,15 +19,13 @@ lazy_static::lazy_static! { bytes: *include_bytes!("grapheme_break_rev.bigendian.dfa"), }; - unsafe { - ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) - } - }; -} + unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) } +}); #[cfg(target_endian = "little")] -lazy_static::lazy_static! { - pub static ref GRAPHEME_BREAK_REV: ::regex_automata::SparseDFA<&'static [u8], u16> = { +pub static GRAPHEME_BREAK_REV: ::once_cell::sync::Lazy< + ::regex_automata::SparseDFA<&'static [u8], u16>, +> = ::once_cell::sync::Lazy::new(|| { #[repr(C)] struct Aligned { _align: [u8; 0], @@ -38,8 +37,5 @@ lazy_static::lazy_static! { bytes: *include_bytes!("grapheme_break_rev.littleendian.dfa"), }; - unsafe { - ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) - } - }; -} + unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) } +}); diff --git a/src/unicode/fsm/regional_indicator_rev.rs b/src/unicode/fsm/regional_indicator_rev.rs index 7f187ec..db7a40f 100644 --- a/src/unicode/fsm/regional_indicator_rev.rs +++ b/src/unicode/fsm/regional_indicator_rev.rs @@ -5,8 +5,9 @@ // ucd-generate 0.2.12 is available on crates.io. #[cfg(target_endian = "big")] -lazy_static::lazy_static! { - pub static ref REGIONAL_INDICATOR_REV: ::regex_automata::DenseDFA<&'static [u8], u8> = { +pub static REGIONAL_INDICATOR_REV: ::once_cell::sync::Lazy< + ::regex_automata::DenseDFA<&'static [u8], u8>, +> = ::once_cell::sync::Lazy::new(|| { #[repr(C)] struct Aligned { _align: [u8; 0], @@ -18,15 +19,13 @@ lazy_static::lazy_static! { bytes: *include_bytes!("regional_indicator_rev.bigendian.dfa"), }; - unsafe { - ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) - } - }; -} + unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) } +}); #[cfg(target_endian = "little")] -lazy_static::lazy_static! { - pub static ref REGIONAL_INDICATOR_REV: ::regex_automata::DenseDFA<&'static [u8], u8> = { +pub static REGIONAL_INDICATOR_REV: ::once_cell::sync::Lazy< + ::regex_automata::DenseDFA<&'static [u8], u8>, +> = ::once_cell::sync::Lazy::new(|| { #[repr(C)] struct Aligned { _align: [u8; 0], @@ -38,8 +37,5 @@ lazy_static::lazy_static! { bytes: *include_bytes!("regional_indicator_rev.littleendian.dfa"), }; - unsafe { - ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) - } - }; -} + unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) } +}); diff --git a/src/unicode/fsm/sentence_break_fwd.rs b/src/unicode/fsm/sentence_break_fwd.rs index 177a00b..97dd658 100644 --- a/src/unicode/fsm/sentence_break_fwd.rs +++ b/src/unicode/fsm/sentence_break_fwd.rs @@ -5,8 +5,9 @@ // ucd-generate 0.2.12 is available on crates.io. #[cfg(target_endian = "big")] -lazy_static::lazy_static! { - pub static ref SENTENCE_BREAK_FWD: ::regex_automata::SparseDFA<&'static [u8], u32> = { +pub static SENTENCE_BREAK_FWD: ::once_cell::sync::Lazy< + ::regex_automata::SparseDFA<&'static [u8], u32>, +> = ::once_cell::sync::Lazy::new(|| { #[repr(C)] struct Aligned { _align: [u8; 0], @@ -18,15 +19,13 @@ lazy_static::lazy_static! { bytes: *include_bytes!("sentence_break_fwd.bigendian.dfa"), }; - unsafe { - ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) - } - }; -} + unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) } +}); #[cfg(target_endian = "little")] -lazy_static::lazy_static! { - pub static ref SENTENCE_BREAK_FWD: ::regex_automata::SparseDFA<&'static [u8], u32> = { +pub static SENTENCE_BREAK_FWD: ::once_cell::sync::Lazy< + ::regex_automata::SparseDFA<&'static [u8], u32>, +> = ::once_cell::sync::Lazy::new(|| { #[repr(C)] struct Aligned { _align: [u8; 0], @@ -38,8 +37,5 @@ lazy_static::lazy_static! { bytes: *include_bytes!("sentence_break_fwd.littleendian.dfa"), }; - unsafe { - ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) - } - }; -} + unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) } +}); diff --git a/src/unicode/fsm/simple_word_fwd.rs b/src/unicode/fsm/simple_word_fwd.rs index 0d4a17e..32b69b6 100644 --- a/src/unicode/fsm/simple_word_fwd.rs +++ b/src/unicode/fsm/simple_word_fwd.rs @@ -5,8 +5,9 @@ // ucd-generate 0.2.12 is available on crates.io. #[cfg(target_endian = "big")] -lazy_static::lazy_static! { - pub static ref SIMPLE_WORD_FWD: ::regex_automata::SparseDFA<&'static [u8], u16> = { +pub static SIMPLE_WORD_FWD: ::once_cell::sync::Lazy< + ::regex_automata::SparseDFA<&'static [u8], u16>, +> = ::once_cell::sync::Lazy::new(|| { #[repr(C)] struct Aligned { _align: [u8; 0], @@ -18,15 +19,13 @@ lazy_static::lazy_static! { bytes: *include_bytes!("simple_word_fwd.bigendian.dfa"), }; - unsafe { - ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) - } - }; -} + unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) } +}); #[cfg(target_endian = "little")] -lazy_static::lazy_static! { - pub static ref SIMPLE_WORD_FWD: ::regex_automata::SparseDFA<&'static [u8], u16> = { +pub static SIMPLE_WORD_FWD: ::once_cell::sync::Lazy< + ::regex_automata::SparseDFA<&'static [u8], u16>, +> = ::once_cell::sync::Lazy::new(|| { #[repr(C)] struct Aligned { _align: [u8; 0], @@ -38,8 +37,5 @@ lazy_static::lazy_static! { bytes: *include_bytes!("simple_word_fwd.littleendian.dfa"), }; - unsafe { - ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) - } - }; -} + unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) } +}); diff --git a/src/unicode/fsm/whitespace_anchored_fwd.rs b/src/unicode/fsm/whitespace_anchored_fwd.rs index 9b71474..0780412 100644 --- a/src/unicode/fsm/whitespace_anchored_fwd.rs +++ b/src/unicode/fsm/whitespace_anchored_fwd.rs @@ -5,8 +5,9 @@ // ucd-generate 0.2.12 is available on crates.io. #[cfg(target_endian = "big")] -lazy_static::lazy_static! { - pub static ref WHITESPACE_ANCHORED_FWD: ::regex_automata::DenseDFA<&'static [u8], u8> = { +pub static WHITESPACE_ANCHORED_FWD: ::once_cell::sync::Lazy< + ::regex_automata::DenseDFA<&'static [u8], u8>, +> = ::once_cell::sync::Lazy::new(|| { #[repr(C)] struct Aligned { _align: [u8; 0], @@ -18,15 +19,13 @@ lazy_static::lazy_static! { bytes: *include_bytes!("whitespace_anchored_fwd.bigendian.dfa"), }; - unsafe { - ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) - } - }; -} + unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) } +}); #[cfg(target_endian = "little")] -lazy_static::lazy_static! { - pub static ref WHITESPACE_ANCHORED_FWD: ::regex_automata::DenseDFA<&'static [u8], u8> = { +pub static WHITESPACE_ANCHORED_FWD: ::once_cell::sync::Lazy< + ::regex_automata::DenseDFA<&'static [u8], u8>, +> = ::once_cell::sync::Lazy::new(|| { #[repr(C)] struct Aligned { _align: [u8; 0], @@ -38,8 +37,5 @@ lazy_static::lazy_static! { bytes: *include_bytes!("whitespace_anchored_fwd.littleendian.dfa"), }; - unsafe { - ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) - } - }; -} + unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) } +}); diff --git a/src/unicode/fsm/whitespace_anchored_rev.rs b/src/unicode/fsm/whitespace_anchored_rev.rs index 79a0071..3d0d7a6 100644 --- a/src/unicode/fsm/whitespace_anchored_rev.rs +++ b/src/unicode/fsm/whitespace_anchored_rev.rs @@ -5,8 +5,9 @@ // ucd-generate 0.2.12 is available on crates.io. #[cfg(target_endian = "big")] -lazy_static::lazy_static! { - pub static ref WHITESPACE_ANCHORED_REV: ::regex_automata::DenseDFA<&'static [u16], u16> = { +pub static WHITESPACE_ANCHORED_REV: ::once_cell::sync::Lazy< + ::regex_automata::DenseDFA<&'static [u16], u16>, +> = ::once_cell::sync::Lazy::new(|| { #[repr(C)] struct Aligned { _align: [u16; 0], @@ -18,15 +19,13 @@ lazy_static::lazy_static! { bytes: *include_bytes!("whitespace_anchored_rev.bigendian.dfa"), }; - unsafe { - ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) - } - }; -} + unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) } +}); #[cfg(target_endian = "little")] -lazy_static::lazy_static! { - pub static ref WHITESPACE_ANCHORED_REV: ::regex_automata::DenseDFA<&'static [u16], u16> = { +pub static WHITESPACE_ANCHORED_REV: ::once_cell::sync::Lazy< + ::regex_automata::DenseDFA<&'static [u16], u16>, +> = ::once_cell::sync::Lazy::new(|| { #[repr(C)] struct Aligned { _align: [u16; 0], @@ -38,8 +37,5 @@ lazy_static::lazy_static! { bytes: *include_bytes!("whitespace_anchored_rev.littleendian.dfa"), }; - unsafe { - ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) - } - }; -} + unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) } +}); diff --git a/src/unicode/fsm/word_break_fwd.rs b/src/unicode/fsm/word_break_fwd.rs index 3637f0f..dcb5f6b 100644 --- a/src/unicode/fsm/word_break_fwd.rs +++ b/src/unicode/fsm/word_break_fwd.rs @@ -5,8 +5,9 @@ // ucd-generate 0.2.12 is available on crates.io. #[cfg(target_endian = "big")] -lazy_static::lazy_static! { - pub static ref WORD_BREAK_FWD: ::regex_automata::SparseDFA<&'static [u8], u32> = { +pub static WORD_BREAK_FWD: ::once_cell::sync::Lazy< + ::regex_automata::SparseDFA<&'static [u8], u32>, +> = ::once_cell::sync::Lazy::new(|| { #[repr(C)] struct Aligned { _align: [u8; 0], @@ -18,15 +19,13 @@ lazy_static::lazy_static! { bytes: *include_bytes!("word_break_fwd.bigendian.dfa"), }; - unsafe { - ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) - } - }; -} + unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) } +}); #[cfg(target_endian = "little")] -lazy_static::lazy_static! { - pub static ref WORD_BREAK_FWD: ::regex_automata::SparseDFA<&'static [u8], u32> = { +pub static WORD_BREAK_FWD: ::once_cell::sync::Lazy< + ::regex_automata::SparseDFA<&'static [u8], u32>, +> = ::once_cell::sync::Lazy::new(|| { #[repr(C)] struct Aligned { _align: [u8; 0], @@ -38,8 +37,5 @@ lazy_static::lazy_static! { bytes: *include_bytes!("word_break_fwd.littleendian.dfa"), }; - unsafe { - ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) - } - }; -} + unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) } +});