trishume · keith-hall · Jul 2, 2018 · Jul 2, 2018 · Jul 3, 2018 · Jul 3, 2018
diff --git a/Cargo.toml b/Cargo.toml
@@ -37,7 +37,6 @@ serde_json = "1.0"
 [dev-dependencies]
 criterion = "0.3"
 rayon = "1.0.0"
-regex = "1.0"
 getopts = "0.2"
 pretty_assertions = "0.6"
 

diff --git a/examples/syntest.rs b/examples/syntest.rs
@@ -7,22 +7,20 @@
 // you can tell it where to parse them from - the following will execute only 1 syntax test after
 // parsing the sublime-syntax files in the JavaScript folder:
 // cargo run --example syntest testdata/Packages/JavaScript/syntax_test_json.json testdata/Packages/JavaScript/
-#[macro_use]
-extern crate lazy_static;
+extern crate syntect;
+extern crate walkdir;
+extern crate getopts;
 
-use syntect::parsing::{SyntaxSet, SyntaxSetBuilder, ParseState, ScopeStack, Scope};
-use syntect::highlighting::ScopeSelectors;
-use syntect::easy::{ScopeRegionIterator};
+use syntect::parsing::{SyntaxSet, SyntaxSetBuilder};
+use syntect::syntax_tests::{SyntaxTestFileResult, SyntaxTestOutputOptions, process_syntax_test_assertions, parse_syntax_test_header_line, SyntaxTestHeader};
 
 use std::path::Path;
+use std::io::prelude::*;
 use std::io::{BufRead, BufReader};
 use std::fs::File;
-use std::cmp::{min, max};
 use std::time::Instant;
-use std::str::FromStr;
 
 use getopts::Options;
-use regex::Regex;
 use walkdir::{DirEntry, WalkDir};
 
 #[derive(Debug, Clone, PartialEq, Eq)]
@@ -31,238 +29,44 @@ pub enum SyntaxTestHeaderError {
     SyntaxDefinitionNotFound,
 }
 
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub enum SyntaxTestFileResult {
-    FailedAssertions(usize, usize),
-    Success(usize),
-}
-
-lazy_static! {
-    pub static ref SYNTAX_TEST_HEADER_PATTERN: Regex = Regex::new(r#"(?xm)
-            ^(?P<testtoken_start>\s*\S+)
-            \s+SYNTAX\sTEST\s+
-            "(?P<syntax_file>[^"]+)"
-            \s*(?P<testtoken_end>\S+)?$
-        "#).unwrap();
-    pub static ref SYNTAX_TEST_ASSERTION_PATTERN: Regex = Regex::new(r#"(?xm)
-        \s*(?:
-            (?P<begin_of_token><-)|(?P<range>\^+)
-        )(.*)$"#).unwrap();
-}
-
-#[derive(Clone, Copy)]
-struct OutputOptions {
-    time: bool,
-    debug: bool,
-    summary: bool,
-}
-
-#[derive(Debug)]
-struct AssertionRange<'a> {
-    begin_char: usize,
-    end_char: usize,
-    scope_selector_text: &'a str,
-    is_pure_assertion_line: bool,
-}
-
-#[derive(Debug)]
-struct ScopedText {
-    scope: Vec<Scope>,
-    char_start: usize,
-    text_len: usize,
-}
-
-#[derive(Debug)]
-struct RangeTestResult {
-    column_begin: usize,
-    column_end: usize,
-    success: bool,
-}
 
-fn get_line_assertion_details<'a>(testtoken_start: &str, testtoken_end: Option<&str>, line: &'a str) -> Option<AssertionRange<'a>> {
-    // if the test start token specified in the test file's header is on the line
-    if let Some(index) = line.find(testtoken_start) {
-        let (before_token_start, token_and_rest_of_line) = line.split_at(index);
-
-        if let Some(captures) = SYNTAX_TEST_ASSERTION_PATTERN.captures(&token_and_rest_of_line[testtoken_start.len()..]) {
-            let mut sst = captures.get(3).unwrap().as_str(); // get the scope selector text
-            let mut only_whitespace_after_token_end = true;
-
-            if let Some(token) = testtoken_end { // if there is an end token defined in the test file header
-                if let Some(end_token_pos) = sst.find(token) { // and there is an end token in the line
-                    let (ss, after_token_end) = sst.split_at(end_token_pos); // the scope selector text ends at the end token
-                    sst = &ss;
-                    only_whitespace_after_token_end = after_token_end.trim_end().is_empty();
-                }
-            }
-            return Some(AssertionRange {
-                begin_char: index + if captures.get(2).is_some() { testtoken_start.len() + captures.get(2).unwrap().start() } else { 0 },
-                end_char: index + if captures.get(2).is_some() { testtoken_start.len() + captures.get(2).unwrap().end() } else { 1 },
-                scope_selector_text: sst,
-                is_pure_assertion_line: before_token_start.trim_start().is_empty() && only_whitespace_after_token_end, // if only whitespace surrounds the test tokens on the line, then it is a pure assertion line
-            });
-        }
-    }
-    None
-}
-
-fn process_assertions(assertion: &AssertionRange<'_>, test_against_line_scopes: &Vec<ScopedText>) -> Vec<RangeTestResult> {
-    // format the scope selector to include a space at the beginning, because, currently, ScopeSelector expects excludes to begin with " -"
-    // and they are sometimes in the syntax test as ^^^-comment, for example
-    let selector = ScopeSelectors::from_str(&format!(" {}", &assertion.scope_selector_text)).unwrap();
-    // find the scope at the specified start column, and start matching the selector through the rest of the tokens on the line from there until the end column is reached
-    let mut results = Vec::new();
-    for scoped_text in test_against_line_scopes.iter().skip_while(|s|s.char_start + s.text_len <= assertion.begin_char).take_while(|s|s.char_start < assertion.end_char) {
-        let match_value = selector.does_match(scoped_text.scope.as_slice());
-        let result = RangeTestResult {
-            column_begin: max(scoped_text.char_start, assertion.begin_char),
-            column_end: min(scoped_text.char_start + scoped_text.text_len, assertion.end_char),
-            success: match_value.is_some()
-        };
-        results.push(result);
-    }
-    // don't ignore assertions after the newline, they should be treated as though they are asserting against the newline
-    let last = test_against_line_scopes.last().unwrap();
-    if last.char_start + last.text_len < assertion.end_char {
-        let match_value = selector.does_match(last.scope.as_slice());
-        let result = RangeTestResult {
-            column_begin: max(last.char_start + last.text_len, assertion.begin_char),
-            column_end: assertion.end_char,
-            success: match_value.is_some()
-        };
-        results.push(result);
-    }
-    results
-}
-
-/// If `parse_test_lines` is `false` then lines that only contain assertions are not parsed
-fn test_file(ss: &SyntaxSet, path: &Path, parse_test_lines: bool, out_opts: OutputOptions) -> Result<SyntaxTestFileResult, SyntaxTestHeaderError> {
-    use syntect::util::debug_print_ops;
+fn test_file(ss: &SyntaxSet, path: &Path, out_opts: SyntaxTestOutputOptions) -> Result<SyntaxTestFileResult, SyntaxTestHeaderError> {
     let f = File::open(path).unwrap();
     let mut reader = BufReader::new(f);
-    let mut line = String::new();
+    let mut header_line = String::new();
 
     // read the first line from the file - if we have reached EOF already, it's an invalid file
-    if reader.read_line(&mut line).unwrap() == 0 {
+    if reader.read_line(&mut header_line).unwrap() == 0 {
         return Err(SyntaxTestHeaderError::MalformedHeader);
     }
-
-    line = line.replace("\r", &"");
+    header_line = header_line.replace("\r", &"");
 
     // parse the syntax test header in the first line of the file
-    let header_line = line.clone();
-    let search_result = SYNTAX_TEST_HEADER_PATTERN.captures(&header_line);
-    let captures = search_result.ok_or(SyntaxTestHeaderError::MalformedHeader)?;
-
-    let testtoken_start = captures.name("testtoken_start").unwrap().as_str();
-    let testtoken_end = captures.name("testtoken_end").map_or(None, |c|Some(c.as_str()));
-    let syntax_file = captures.name("syntax_file").unwrap().as_str();
+    let SyntaxTestHeader { testtoken_start, testtoken_end, syntax_file, reindent_text: _ } = parse_syntax_test_header_line(&header_line).ok_or(SyntaxTestHeaderError::MalformedHeader)?;
 
     // find the relevant syntax definition to parse the file with - case is important!
     if !out_opts.summary {
-        println!("The test file references syntax definition file: {}", syntax_file);
+        println!("The test file references syntax definition file: {}", syntax_file); //" and the start test token is {} and the end token is {:?}", testtoken_start, testtoken_end);
     }
     let syntax = ss.find_syntax_by_path(syntax_file).ok_or(SyntaxTestHeaderError::SyntaxDefinitionNotFound)?;
 
-    // iterate over the lines of the file, testing them
-    let mut state = ParseState::new(syntax);
-    let mut stack = ScopeStack::new();
-
-    let mut current_line_number = 1;
-    let mut test_against_line_number = 1;
-    let mut scopes_on_line_being_tested = Vec::new();
-    let mut previous_non_assertion_line = line.to_string();
-
-    let mut assertion_failures: usize = 0;
-    let mut total_assertions: usize = 0;
-
-    loop { // over lines of file, starting with the header line
-        let mut line_only_has_assertion = false;
-        let mut line_has_assertion = false;
-        if let Some(assertion) = get_line_assertion_details(testtoken_start, testtoken_end, &line) {
-            let result = process_assertions(&assertion, &scopes_on_line_being_tested);
-            total_assertions += &assertion.end_char - &assertion.begin_char;
-            for failure in result.iter().filter(|r|!r.success) {
-                let length = failure.column_end - failure.column_begin;
-                let text: String = previous_non_assertion_line.chars().skip(failure.column_begin).take(length).collect();
-                if !out_opts.summary {
-                    println!("  Assertion selector {:?} \
-                        from line {:?} failed against line {:?}, column range {:?}-{:?} \
-                        (with text {:?}) \
-                        has scope {:?}",
-                        assertion.scope_selector_text.trim(),
-                        current_line_number, test_against_line_number, failure.column_begin, failure.column_end,
-                        text,
-                        scopes_on_line_being_tested.iter().skip_while(|s|s.char_start + s.text_len <= failure.column_begin).next().unwrap_or(scopes_on_line_being_tested.last().unwrap()).scope
-                    );
-                }
-                assertion_failures += failure.column_end - failure.column_begin;
-            }
-            line_only_has_assertion = assertion.is_pure_assertion_line;
-            line_has_assertion = true;
-        }
-        if !line_only_has_assertion || parse_test_lines {
-            if !line_has_assertion { // ST seems to ignore lines that have assertions when calculating which line the assertion tests against
-                scopes_on_line_being_tested.clear();
-                test_against_line_number = current_line_number;
-                previous_non_assertion_line = line.to_string();
-            }
-            if out_opts.debug && !line_only_has_assertion {
-                println!("-- debugging line {} -- scope stack: {:?}", current_line_number, stack);
-            }
-            let ops = state.parse_line(&line, &ss);
-            if out_opts.debug && !line_only_has_assertion {
-                if ops.is_empty() && !line.is_empty() {
-                    println!("no operations for this line...");
-                } else {
-                    debug_print_ops(&line, &ops);
-                }
-            }
-            let mut col: usize = 0;
-            for (s, op) in ScopeRegionIterator::new(&ops, &line) {
-                stack.apply(op);
-                if s.is_empty() { // in this case we don't care about blank tokens
-                    continue;
-                }
-                if !line_has_assertion {
-                    // if the line has no assertions on it, remember the scopes on the line so we can test against them later
-                    let len = s.chars().count();
-                    scopes_on_line_being_tested.push(
-                        ScopedText {
-                            char_start: col,
-                            text_len: len,
-                            scope: stack.as_slice().to_vec()
-                        }
-                    );
-                    // TODO: warn when there are duplicate adjacent (non-meta?) scopes, as it is almost always undesired
-                    col += len;
-                }
-            }
-        }
+    let mut contents = String::new();
+    contents.push_str(&header_line);
+    reader.read_to_string(&mut contents).expect("Unable to read file");
+    contents = contents.replace("\r", &"");
 
-        line.clear();
-        current_line_number += 1;
-        if reader.read_line(&mut line).unwrap() == 0 {
-            break;
-        }
-        line = line.replace("\r", &"");
-    }
-    let res = if assertion_failures > 0 {
-        Ok(SyntaxTestFileResult::FailedAssertions(assertion_failures, total_assertions))
-    } else {
-        Ok(SyntaxTestFileResult::Success(total_assertions))
-    };
+    let res = process_syntax_test_assertions(&ss, &syntax, &contents, testtoken_start, testtoken_end, &out_opts);
 
     if out_opts.summary {
-        if let Ok(SyntaxTestFileResult::FailedAssertions(failures, _)) = res {
+        if let SyntaxTestFileResult::FailedAssertions(failures, _) = res {
             // Don't print total assertion count so that diffs don't pick up new succeeding tests
             println!("FAILED {}: {}", path.display(), failures);
         }
     } else {
         println!("{:?}", res);
     }
 
-    res
+    Ok(res)
 }
 
 fn main() {
@@ -271,6 +75,7 @@ fn main() {
     opts.optflag("d", "debug", "Show parsing results for each test line");
     opts.optflag("t", "time", "Time execution as a more broad-ranging benchmark");
     opts.optflag("s", "summary", "Print only summary of test failures");
+    opts.optflag("f", "failfast", "Stop at first failure");
 
     let matches = match opts.parse(&args[1..]) {
         Ok(m) => { m }
@@ -305,10 +110,11 @@ fn main() {
         ss = builder.build();
     }
 
-    let out_opts = OutputOptions {
+    let out_opts = SyntaxTestOutputOptions {
         debug: matches.opt_present("debug"),
         time: matches.opt_present("time"),
         summary: matches.opt_present("summary"),
+        failfast: matches.opt_present("failfast"),
     };
 
     let exit_code = recursive_walk(&ss, &tests_path, out_opts);
@@ -318,7 +124,7 @@ fn main() {
 }
 
 
-fn recursive_walk(ss: &SyntaxSet, path: &str, out_opts: OutputOptions) -> i32 {
+fn recursive_walk(ss: &SyntaxSet, path: &str, out_opts: SyntaxTestOutputOptions) -> i32 {
     let mut exit_code: i32 = 0; // exit with code 0 by default, if all tests pass
     let walker = WalkDir::new(path).into_iter();
 
@@ -337,18 +143,25 @@ fn recursive_walk(ss: &SyntaxSet, path: &str, out_opts: OutputOptions) -> i32 {
             println!("Testing file {}", path.display());
         }
         let start = Instant::now();
-        let result = test_file(&ss, path, true, out_opts);
+        let result = test_file(&ss, path, out_opts);
         let elapsed = start.elapsed();
         if out_opts.time {
             let ms = (elapsed.as_secs() * 1_000) + (elapsed.subsec_nanos() / 1_000_000) as u64;
             println!("{} ms for file {}", ms, path.display());
         }
         if exit_code != 2 { // leave exit code 2 if there was an error
             if let Err(_) = result { // set exit code 2 if there was an error
+                println!("{:?}", result);
                 exit_code = 2;
+                if out_opts.failfast {
+                    break;
+                }
             } else if let Ok(ok) = result {
                 if let SyntaxTestFileResult::FailedAssertions(_, _) = ok {
                     exit_code = 1; // otherwise, if there were failures, exit with code 1
+                    if out_opts.failfast {
+                        break;
+                    }
                 }
             }
         }

diff --git a/src/lib.rs b/src/lib.rs
@@ -27,6 +27,8 @@ extern crate pretty_assertions;
 pub mod dumps;
 #[cfg(feature = "parsing")]
 pub mod easy;
+#[cfg(feature = "parsing")]
+pub mod syntax_tests;
 #[cfg(feature = "html")]
 mod escape;
 pub mod highlighting;