-
-
Notifications
You must be signed in to change notification settings - Fork 12
/
parse.rs
68 lines (58 loc) · 1.78 KB
/
parse.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
use anyhow::{bail, Result};
use std::collections::BTreeSet as Set;
use std::fs;
use std::path::Path;
pub struct Properties {
xid_start: Set<u32>,
xid_continue: Set<u32>,
}
impl Properties {
pub fn is_xid_start(&self, ch: char) -> bool {
self.xid_start.contains(&(ch as u32))
}
pub fn is_xid_continue(&self, ch: char) -> bool {
self.xid_continue.contains(&(ch as u32))
}
}
pub fn parse_xid_properties(ucd_dir: &Path) -> Result<Properties> {
let mut properties = Properties {
xid_start: Set::new(),
xid_continue: Set::new(),
};
let filename = "DerivedCoreProperties.txt";
let path = ucd_dir.join(filename);
let contents = fs::read_to_string(path)?;
for (i, line) in contents.lines().enumerate() {
if line.starts_with('#') || line.trim().is_empty() {
continue;
}
let (lo, hi, name) = match parse_line(line) {
Some(line) => line,
None => bail!("{} line {} is unexpected:\n{}", filename, i, line),
};
let set = match name {
"XID_Start" => &mut properties.xid_start,
"XID_Continue" => &mut properties.xid_continue,
_ => continue,
};
set.extend(lo..=hi);
}
Ok(properties)
}
fn parse_line(line: &str) -> Option<(u32, u32, &str)> {
let (mut codepoint, rest) = line.split_once(';')?;
let (lo, hi);
codepoint = codepoint.trim();
if let Some((a, b)) = codepoint.split_once("..") {
lo = parse_codepoint(a)?;
hi = parse_codepoint(b)?;
} else {
lo = parse_codepoint(codepoint)?;
hi = lo;
}
let name = rest.trim().split('#').next()?.trim_end();
Some((lo, hi, name))
}
fn parse_codepoint(s: &str) -> Option<u32> {
u32::from_str_radix(s, 16).ok()
}