Skip to content

Commit

Permalink
Merge pull request #530 from rhysd/utf8-bom
Browse files Browse the repository at this point in the history
Ignore UTF-8 BOM on syntax detection
  • Loading branch information
keith-hall committed Apr 23, 2024
2 parents 53413d5 + cb4a0f7 commit de715e5
Showing 1 changed file with 11 additions and 0 deletions.
11 changes: 11 additions & 0 deletions src/parsing/syntax_set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ impl SyntaxSet {
/// This uses regexes that come with some sublime syntax grammars for matching things like
/// shebangs and mode lines like `-*- Mode: C -*-`
pub fn find_syntax_by_first_line<'a>(&'a self, s: &str) -> Option<&'a SyntaxReference> {
let s = s.strip_prefix("\u{feff}").unwrap_or(s); // Strip UTF-8 BOM
let cache = self.first_line_cache();
for &(ref reg, i) in cache.regexes.iter().rev() {
if reg.search(s, 0, s.len(), None) {
Expand Down Expand Up @@ -1401,6 +1402,16 @@ mod tests {
assert_prototype_only_on(&["main"], &rebuilt, &rebuilt.syntaxes()[0]);
}

#[test]
fn find_syntax_set_from_line_with_bom() {
// Regression test for #529
let syntax_set = SyntaxSet::load_defaults_newlines();
let syntax_ref = syntax_set
.find_syntax_by_first_line("\u{feff}<?xml version=\"1.0\"?>")
.unwrap();
assert_eq!(syntax_ref.name, "XML");
}

fn assert_ops_contain(ops: &[(usize, ScopeStackOp)], expected: &(usize, ScopeStackOp)) {
assert!(
ops.contains(expected),
Expand Down

0 comments on commit de715e5

Please sign in to comment.