Skip to content

Commit

Permalink
Merge pull request #165 from sharkdp/regex-syntax-error-checking
Browse files Browse the repository at this point in the history
Check regular expressions for compile errors
  • Loading branch information
trishume committed May 26, 2018
2 parents 6ad8132 + d8d1325 commit ab5f77d
Show file tree
Hide file tree
Showing 3 changed files with 115 additions and 24 deletions.
4 changes: 3 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,8 @@ impl fmt::Display for LoadingError {
match *self {
WalkDir(ref error) => error.fmt(f),
Io(ref error) => error.fmt(f),
#[cfg(feature = "yaml-load")]
ParseSyntax(ref error) => error.fmt(f),
_ => write!(f, "{}", self.description()),
}
}
Expand All @@ -125,7 +127,7 @@ impl Error for LoadingError {
WalkDir(ref error) => error.description(),
Io(ref error) => error.description(),
#[cfg(feature = "yaml-load")]
ParseSyntax(_) => "Invalid syntax file",
ParseSyntax(ref error) => error.description(),
ParseTheme(_) => "Invalid syntax theme",
ReadSettings(_) => "Invalid syntax theme settings",
BadPath => "Invalid path",
Expand Down
48 changes: 28 additions & 20 deletions src/parsing/syntax_definition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -207,31 +207,39 @@ impl ContextReference {
}
}

pub(crate) fn substitute_backrefs_in_regex<F>(regex_str: &str, substituter: F) -> String
where F: Fn(usize) -> Option<String>
{
let mut reg_str = String::with_capacity(regex_str.len());

let mut last_was_escape = false;
for c in regex_str.chars() {
if last_was_escape && c.is_digit(10) {
let val = c.to_digit(10).unwrap() as usize;
if let Some(sub) = substituter(val) {
reg_str.push_str(&sub);
}
} else if last_was_escape {
reg_str.push('\\');
reg_str.push(c);
} else if c != '\\' {
reg_str.push(c);
}

last_was_escape = c == '\\' && !last_was_escape;
}
reg_str
}

impl MatchPattern {

/// substitutes back-refs in Regex with regions from s
/// used for match patterns which refer to captures from the pattern
/// that pushed them.
pub fn regex_with_substitutes(&self, region: &Region, s: &str) -> String {
let mut reg_str = String::new();

let mut last_was_escape = false;
for c in self.regex_str.chars() {
if last_was_escape && c.is_digit(10) {
let val = c.to_digit(10).unwrap();
if let Some((start, end)) = region.pos(val as usize) {
let escaped = escape(&s[start..end]);
reg_str.push_str(&escaped);
}
} else if last_was_escape {
reg_str.push('\\');
reg_str.push(c);
} else if c != '\\' {
reg_str.push(c);
}

last_was_escape = c == '\\' && !last_was_escape;
}
reg_str
substitute_backrefs_in_regex(&self.regex_str, |i| {
region.pos(i).map(|(start, end)| escape(&s[start..end]))
})
}

/// Used by the parser to compile a regex which needs to reference
Expand Down
87 changes: 84 additions & 3 deletions src/parsing/yaml_load.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@ use super::syntax_definition::*;
use yaml_rust::{YamlLoader, Yaml, ScanError};
use yaml_rust::yaml::Hash;
use std::collections::HashMap;
use onig::{self, Regex, Captures};
use onig::{self, Regex, Captures, RegexOptions, Syntax};
use std::rc::Rc;
use std::cell::RefCell;
use std::error::Error;
use std::fmt;
use std::path::Path;
use std::ops::DerefMut;

Expand All @@ -18,7 +20,7 @@ pub enum ParseSyntaxError {
/// Some keys are required for something to be a valid `.sublime-syntax`
MissingMandatoryKey(&'static str),
/// Invalid regex
RegexCompileError(onig::Error),
RegexCompileError(String, onig::Error),
/// A scope that syntect's scope implementation can't handle
InvalidScope(ParseScopeError),
/// A reference to another file that is invalid
Expand All @@ -31,6 +33,46 @@ pub enum ParseSyntaxError {
TypeMismatch,
}

impl fmt::Display for ParseSyntaxError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use ParseSyntaxError::*;

match *self {
RegexCompileError(ref regex, ref error) =>
write!(f, "Error while compiling regex '{}': {}",
regex, error.description()),
_ => write!(f, "{}", self.description())
}
}
}

impl Error for ParseSyntaxError {
fn description(&self) -> &str {
use ParseSyntaxError::*;

match *self {
InvalidYaml(_) => "Invalid YAML file syntax",
EmptyFile => "Empty file",
MissingMandatoryKey(_) => "Missing mandatory key in YAML file",
RegexCompileError(_, ref error) => error.description(),
InvalidScope(_) => "Invalid scope",
BadFileRef => "Invalid file reference",
MainMissing => "Context 'main' is missing",
TypeMismatch => "Type mismatch",
}
}

fn cause(&self) -> Option<&Error> {
use ParseSyntaxError::*;

match *self {
InvalidYaml(ref error) => Some(error),
RegexCompileError(_, ref error) => Some(error),
_ => None,
}
}
}

fn get_key<'a, R, F: FnOnce(&'a Yaml) -> Option<R>>(map: &'a Hash,
key: &'static str,
f: F)
Expand Down Expand Up @@ -247,6 +289,21 @@ impl SyntaxDefinition {
})
}

fn try_compile_regex(regex_str: &str) -> Result<(), ParseSyntaxError> {
// Replace backreferences with a placeholder value that will also appear in errors
let regex_str = substitute_backrefs_in_regex(regex_str, |i| Some(format!("<placeholder_{}>", i)));

let result = Regex::with_options(&regex_str,
RegexOptions::REGEX_OPTION_CAPTURE_GROUP,
Syntax::default());
match result {
Err(onig_error) => {
Err(ParseSyntaxError::RegexCompileError(regex_str, onig_error))
},
_ => Ok(())
}
}

fn parse_match_pattern(map: &Hash,
state: &mut ParserState)
-> Result<MatchPattern, ParseSyntaxError> {
Expand All @@ -260,6 +317,8 @@ impl SyntaxDefinition {
};
// println!("{:?}", regex_str);

Self::try_compile_regex(&regex_str)?;

let scope = get_key(map, "scope", |x| x.as_str())
.ok()
.map(|s| str_to_scopes(s, state.scope_repo))
Expand Down Expand Up @@ -420,6 +479,10 @@ fn rewrite_regex(regex: String) -> String {
return regex;
}

// A special fix to rewrite a pattern from the `Rd` syntax that the RegexRewriter can not
// handle properly.
let regex = regex.replace("(?:\\n)?", "(?:$|)");

let rewriter = RegexRewriter {
bytes: regex.as_bytes(),
index: 0,
Expand Down Expand Up @@ -704,6 +767,24 @@ mod tests {
}
}

#[test]
fn errors_on_regex_compile_error() {
let def = SyntaxDefinition::load_from_str(r#"
name: C
scope: source.c
file_extensions: [test]
contexts:
main:
- match: '[a'
scope: keyword.name
"#,false, None);
assert!(def.is_err());
match def.unwrap_err() {
ParseSyntaxError::RegexCompileError(ref regex, _) => assert_eq!("[a", regex),
_ => assert!(false, "Got unexpected ParseSyntaxError"),
}
}

#[test]
fn can_parse_ugly_yaml() {
let defn: SyntaxDefinition =
Expand Down Expand Up @@ -790,7 +871,7 @@ mod tests {
// In order to properly understand nesting, we'd have to have a full parser, so ignore it.
assert_eq!(&rewrite(r"[[a]&&[\n]]"), r"[[a]&&[\n]]");

assert_eq!(&rewrite(r"ab(?:\n)?"), r"ab(?:$)?");
assert_eq!(&rewrite(r"ab(?:\n)?"), r"ab(?:$|)");
assert_eq!(&rewrite(r"(?<!\n)ab"), r"(?<!$)ab");
assert_eq!(&rewrite(r"(?<=\n)ab"), r"(?<=$)ab");
}
Expand Down

0 comments on commit ab5f77d

Please sign in to comment.