Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace ucd-generate dependency with ucd-parse #20

Merged
merged 1 commit into from Nov 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitattributes
@@ -1,5 +1,5 @@
generate/src/ucd.rs linguist-generated
src/tables.rs linguist-generated
tests/fst/xid_continue.fst linguist-generated
tests/fst/xid_start.fst linguist-generated
tests/tables/tables.rs linguist-generated
tests/trie/trie.rs linguist-generated
6 changes: 3 additions & 3 deletions .github/workflows/ci.yml
Expand Up @@ -20,15 +20,15 @@ jobs:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@stable
- id: ucd-generate
run: echo "version=$(grep 'ucd-generate [0-9]\+\.[0-9]\+\.[0-9]\+' generate/src/ucd.rs --only-matching)" >> $GITHUB_OUTPUT
run: echo "version=$(grep 'ucd-generate [0-9]\+\.[0-9]\+\.[0-9]\+' tests/tables/tables.rs --only-matching)" >> $GITHUB_OUTPUT
- run: cargo install ucd-generate
- run: curl -LO https://www.unicode.org/Public/zipped/latest/UCD.zip
- run: unzip UCD.zip -d UCD
- run: ucd-generate property-bool UCD --include XID_Start,XID_Continue > generate/src/ucd.rs
- run: ucd-generate property-bool UCD --include XID_Start,XID_Continue > tests/tables/tables.rs
- run: ucd-generate property-bool UCD --include XID_Start,XID_Continue --fst-dir tests/fst
- run: ucd-generate property-bool UCD --include XID_Start,XID_Continue --trie-set > tests/trie/trie.rs
- run: cargo run --manifest-path generate/Cargo.toml
- run: sed --in-place 's/ucd-generate [0-9]\+\.[0-9]\+\.[0-9]\+/${{steps.ucd-generate.outputs.version}}/' generate/src/ucd.rs tests/trie/trie.rs
- run: sed --in-place 's/ucd-generate [0-9]\+\.[0-9]\+\.[0-9]\+/${{steps.ucd-generate.outputs.version}}/' tests/tables/tables.rs tests/trie/trie.rs
- run: git diff --exit-code

test:
Expand Down
4 changes: 4 additions & 0 deletions generate/Cargo.toml
Expand Up @@ -4,3 +4,7 @@ version = "0.0.0"
authors = ["David Tolnay <dtolnay@gmail.com>"]
edition = "2018"
publish = false

[dependencies]
anyhow = "1"
ucd-parse = "0.1.10"
64 changes: 29 additions & 35 deletions generate/src/main.rs
Expand Up @@ -3,7 +3,7 @@
// $ cargo install ucd-generate
// $ curl -LO https://www.unicode.org/Public/zipped/15.0.0/UCD.zip
// $ unzip UCD.zip -d UCD
// $ ucd-generate property-bool UCD --include XID_Start,XID_Continue > generate/src/ucd.rs
// $ ucd-generate property-bool UCD --include XID_Start,XID_Continue > tests/table/tables.rs
// $ ucd-generate property-bool UCD --include XID_Start,XID_Continue --fst-dir tests/fst
// $ ucd-generate property-bool UCD --include XID_Start,XID_Continue --trie-set > tests/trie/trie.rs
// $ cargo run --manifest-path generate/Cargo.toml
Expand All @@ -12,50 +12,43 @@
clippy::cast_lossless,
clippy::cast_possible_truncation, // https://github.com/rust-lang/rust-clippy/issues/9613
clippy::match_wild_err_arm,
clippy::module_name_repetitions,
clippy::too_many_lines,
clippy::uninlined_format_args
)]

#[rustfmt::skip]
#[allow(dead_code, clippy::all, clippy::pedantic)]
mod ucd;

mod output;
mod parse;
mod write;

use std::cmp::Ordering;
use crate::parse::parse_xid_properties;
use anyhow::Result;
use std::collections::{BTreeMap as Map, VecDeque};
use std::convert::TryFrom;
use std::fs;
use std::io;
use std::io::{self, Write};
use std::path::Path;
use std::process;

const CHUNK: usize = 64;
const PATH: &str = "../src/tables.rs";

fn is_xid_start(ch: char) -> bool {
search(ch, ucd::XID_START)
}

fn is_xid_continue(ch: char) -> bool {
search(ch, ucd::XID_CONTINUE)
}
const UCD: &str = "UCD";
const TABLES: &str = "src/tables.rs";

fn main() -> Result<()> {
let manifest_dir = Path::new(env!("CARGO_MANIFEST_DIR"));
let unicode_ident_dir = manifest_dir.parent().unwrap();
let ucd_dir = unicode_ident_dir.join(UCD);
if !ucd_dir.exists() {
writeln!(
io::stderr(),
"Not found: {}\nDownload from https://www.unicode.org/Public/zipped/l5.0.0/UCD.zip and unzip.",
ucd_dir.display(),
)?;
process::exit(1);
}

fn search(ch: char, table: &[(u32, u32)]) -> bool {
table
.binary_search_by(|&(lo, hi)| {
if lo > ch as u32 {
Ordering::Greater
} else if hi < ch as u32 {
Ordering::Less
} else {
Ordering::Equal
}
})
.is_ok()
}
let properties = parse_xid_properties(&ucd_dir)?;

fn main() -> io::Result<()> {
let mut chunkmap = Map::<[u8; CHUNK], u8>::new();
let mut dense = Vec::<[u8; CHUNK]>::new();
let mut new_chunk = |chunk| {
Expand Down Expand Up @@ -87,8 +80,8 @@ fn main() -> io::Result<()> {
let code = (i * CHUNK as u32 + j) * 8 + k;
if code >= 0x80 {
if let Some(ch) = char::from_u32(code) {
*this_start |= (is_xid_start(ch) as u8) << k;
*this_continue |= (is_xid_continue(ch) as u8) << k;
*this_start |= (properties.is_xid_start(ch) as u8) << k;
*this_continue |= (properties.is_xid_continue(ch) as u8) << k;
}
}
}
Expand Down Expand Up @@ -163,7 +156,8 @@ fn main() -> io::Result<()> {
*index = dense_to_halfdense[index];
}

let out = write::output(&index_start, &index_continue, &halfdense);
let path = Path::new(env!("CARGO_MANIFEST_DIR")).join(PATH);
fs::write(path, out)
let out = write::output(&properties, &index_start, &index_continue, &halfdense);
let path = unicode_ident_dir.join(TABLES);
fs::write(path, out)?;
Ok(())
}
40 changes: 40 additions & 0 deletions generate/src/parse.rs
@@ -0,0 +1,40 @@
use anyhow::Result;
use std::collections::BTreeSet as Set;
use std::path::Path;
use ucd_parse::CoreProperty;

pub struct Properties {
xid_start: Set<u32>,
xid_continue: Set<u32>,
}

impl Properties {
pub fn is_xid_start(&self, ch: char) -> bool {
self.xid_start.contains(&(ch as u32))
}

pub fn is_xid_continue(&self, ch: char) -> bool {
self.xid_continue.contains(&(ch as u32))
}
}

pub fn parse_xid_properties(ucd_dir: &Path) -> Result<Properties> {
let mut properties = Properties {
xid_start: Set::new(),
xid_continue: Set::new(),
};

let prop_list: Vec<CoreProperty> = ucd_parse::parse(ucd_dir)?;
for core in prop_list {
let set = match core.property.as_str() {
"XID_Start" => &mut properties.xid_start,
"XID_Continue" => &mut properties.xid_continue,
_ => continue,
};
for codepoint in core.codepoints {
set.insert(codepoint.value());
}
}

Ok(properties)
}
16 changes: 12 additions & 4 deletions generate/src/write.rs
@@ -1,5 +1,6 @@
use crate::output::Output;
use crate::{is_xid_continue, is_xid_start, CHUNK};
use crate::parse::Properties;
use crate::CHUNK;

const HEAD: &str = "\
// \x40generated by ../generate. To regenerate, run the following in the repo root:
Expand All @@ -17,7 +18,12 @@ pub(crate) struct Align8<T>(pub(crate) T);
pub(crate) struct Align64<T>(pub(crate) T);
";

pub fn output(index_start: &[u8], index_continue: &[u8], halfdense: &[u8]) -> Output {
pub fn output(
properties: &Properties,
index_start: &[u8],
index_continue: &[u8],
halfdense: &[u8],
) -> Output {
let mut out = Output::new();
writeln!(out, "{}", HEAD);

Expand All @@ -29,7 +35,8 @@ pub fn output(index_start: &[u8], index_continue: &[u8], halfdense: &[u8]) -> Ou
write!(out, " ");
for j in 0..32 {
let ch = (i * 32 + j) as char;
write!(out, " {},", if is_xid_start(ch) { 'T' } else { 'F' });
let is_xid_start = properties.is_xid_start(ch);
write!(out, " {},", if is_xid_start { 'T' } else { 'F' });
}
writeln!(out);
}
Expand All @@ -44,7 +51,8 @@ pub fn output(index_start: &[u8], index_continue: &[u8], halfdense: &[u8]) -> Ou
write!(out, " ");
for j in 0..32 {
let ch = (i * 32 + j) as char;
write!(out, " {},", if is_xid_continue(ch) { 'T' } else { 'F' });
let is_xid_continue = properties.is_xid_continue(ch);
write!(out, " {},", if is_xid_continue { 'T' } else { 'F' });
}
writeln!(out);
}
Expand Down
9 changes: 4 additions & 5 deletions tests/static_size.rs
Expand Up @@ -19,14 +19,13 @@ fn test_size() {
#[test]
fn test_xid_size() {
#[deny(dead_code)]
#[allow(clippy::redundant_static_lifetimes)]
#[path = "../generate/src/ucd.rs"]
mod ucd;
#[path = "tables/mod.rs"]
mod tables;

let size = size_of_val(ucd::XID_START) + size_of_val(ucd::XID_CONTINUE);
let size = size_of_val(tables::XID_START) + size_of_val(tables::XID_CONTINUE);
assert_eq!(11528, size);

let _ = ucd::BY_NAME;
let _ = tables::BY_NAME;
}

#[cfg(target_pointer_width = "64")]
Expand Down
7 changes: 7 additions & 0 deletions tests/tables/mod.rs
@@ -0,0 +1,7 @@
#![allow(clippy::module_inception)]

#[allow(clippy::redundant_static_lifetimes)]
#[rustfmt::skip]
mod tables;

pub(crate) use self::tables::*;
File renamed without changes.