forked from tafia/quick-xml
/
custom_entities.rs
73 lines (65 loc) · 2.51 KB
/
custom_entities.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
//! This example demonstrate how custom entities can be extracted from the DOCTYPE!,
//! and later use to decode text and attribute values.
//!
//! NB: this example is deliberately kept simple:
//! * it assumes that the XML file is UTF-8 encoded (custom_entities must only contain UTF-8 data)
//! * it only handles internal entities;
//! * the regex in this example is simple but brittle;
//! * it does not support the use of entities in entity declaration.
use std::collections::HashMap;
use quick_xml::events::Event;
use quick_xml::reader::Reader;
use regex::bytes::Regex;
const DATA: &str = r#"
<?xml version="1.0"?>
<!DOCTYPE test [
<!ENTITY msg "hello world" >
]>
<test label="&msg;">&msg;</test>
"#;
fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut reader = Reader::from_str(DATA);
reader.trim_text(true);
let mut custom_entities: HashMap<String, String> = HashMap::new();
let entity_re = Regex::new(r#"<!ENTITY\s+([^ \t\r\n]+)\s+"([^"]*)"\s*>"#)?;
loop {
match reader.read_event() {
Ok(Event::DocType(ref e)) => {
for cap in entity_re.captures_iter(e) {
custom_entities.insert(
reader.decoder().decode(&cap[1])?.into_owned(),
reader.decoder().decode(&cap[2])?.into_owned(),
);
}
}
Ok(Event::Start(ref e)) => match e.name().as_ref() {
b"test" => {
let attributes = e
.attributes()
.map(|a| {
a.unwrap()
.decode_and_unescape_value_with(&reader, |ent| {
custom_entities.get(ent).map(|s| s.as_str())
})
.unwrap()
.into_owned()
})
.collect::<Vec<_>>();
println!("attributes values: {:?}", attributes);
}
_ => (),
},
Ok(Event::Text(ref e)) => {
println!(
"text value: {}",
e.unescape_with(|ent| custom_entities.get(ent).map(|s| s.as_str()))
.unwrap()
);
}
Ok(Event::Eof) => break,
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
_ => (),
}
}
Ok(())
}