Skip to content

Commit

Permalink
Remove BOM coding from svg files
Browse files Browse the repository at this point in the history
  • Loading branch information
qarmin committed Aug 20, 2022
1 parent 014f9ec commit 1fa8410
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 0 deletions.
20 changes: 20 additions & 0 deletions src/matchers/text.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ pub fn is_html(buf: &[u8]) -> bool {
pub fn is_xml(buf: &[u8]) -> bool {
let val: &[u8] = b"<?xml";
let buf = trim_start_whitespaces(buf);
let buf = trim_start_byte_order_marks(buf);
starts_with_ignore_ascii_case(buf, val)
}

Expand All @@ -61,6 +62,25 @@ fn trim_start_whitespaces(mut buf: &[u8]) -> &[u8] {
buf
}

/// Strip whitespaces at the beginning of the buffer.
fn trim_start_byte_order_marks(mut buf: &[u8]) -> &[u8] {
while !buf.is_empty() {
if buf.len() >= 3 {
match (buf[0], buf[1], buf[2]) {
(0xEF, 0xBB, 0xBF) => buf = &buf[3..],// UTF-8
_ => break,
}
} else if buf.len() >= 2 {
match (buf[0], buf[1]) {
(0xFE, 0xFF) => buf = &buf[2..],// UTF-16 BE
(0xFF, 0xFE) => buf = &buf[2..],// UTF-16 BE
_ => break,
}
}
}
buf
}

fn starts_with_ignore_ascii_case(buf: &[u8], needle: &[u8]) -> bool {
buf.len() >= needle.len() && buf[..needle.len()].eq_ignore_ascii_case(needle)
}
Expand Down
6 changes: 6 additions & 0 deletions testdata/sample2.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="utf-8"?>
<doc>
<assembly>
<name>System.Runtime</name>
</assembly>
</doc>
2 changes: 2 additions & 0 deletions tests/text.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,6 @@ test_format!(Text, "text/html", "html", html, "sample.html");

test_format!(Text, "text/xml", "xml", xml, "sample.xml");

test_format!(Text, "text/xml", "xml", xml2, "sample2.xml");

test_format!(Text, "text/x-shellscript", "sh", sh, "sample.sh");

0 comments on commit 1fa8410

Please sign in to comment.