Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

initial rework of syntest to be usable internally #185

Open
wants to merge 14 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 0 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ serde_json = "1.0"
[dev-dependencies]
criterion = "0.3"
rayon = "1.0.0"
regex = "1.0"
getopts = "0.2"
pretty_assertions = "0.6"

Expand Down
249 changes: 31 additions & 218 deletions examples/syntest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,20 @@
// you can tell it where to parse them from - the following will execute only 1 syntax test after
// parsing the sublime-syntax files in the JavaScript folder:
// cargo run --example syntest testdata/Packages/JavaScript/syntax_test_json.json testdata/Packages/JavaScript/
#[macro_use]
extern crate lazy_static;
extern crate syntect;
extern crate walkdir;
extern crate getopts;

use syntect::parsing::{SyntaxSet, SyntaxSetBuilder, ParseState, ScopeStack, Scope};
use syntect::highlighting::ScopeSelectors;
use syntect::easy::{ScopeRegionIterator};
use syntect::parsing::{SyntaxSet, SyntaxSetBuilder};
use syntect::syntax_tests::{SyntaxTestFileResult, SyntaxTestOutputOptions, process_syntax_test_assertions, parse_syntax_test_header_line, SyntaxTestHeader};

use std::path::Path;
use std::io::prelude::*;
use std::io::{BufRead, BufReader};
use std::fs::File;
use std::cmp::{min, max};
use std::time::Instant;
use std::str::FromStr;

use getopts::Options;
use regex::Regex;
use walkdir::{DirEntry, WalkDir};

#[derive(Debug, Clone, PartialEq, Eq)]
Expand All @@ -31,238 +29,44 @@ pub enum SyntaxTestHeaderError {
SyntaxDefinitionNotFound,
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub enum SyntaxTestFileResult {
FailedAssertions(usize, usize),
Success(usize),
}

lazy_static! {
pub static ref SYNTAX_TEST_HEADER_PATTERN: Regex = Regex::new(r#"(?xm)
^(?P<testtoken_start>\s*\S+)
\s+SYNTAX\sTEST\s+
"(?P<syntax_file>[^"]+)"
\s*(?P<testtoken_end>\S+)?$
"#).unwrap();
pub static ref SYNTAX_TEST_ASSERTION_PATTERN: Regex = Regex::new(r#"(?xm)
\s*(?:
(?P<begin_of_token><-)|(?P<range>\^+)
)(.*)$"#).unwrap();
}

#[derive(Clone, Copy)]
struct OutputOptions {
time: bool,
debug: bool,
summary: bool,
}

#[derive(Debug)]
struct AssertionRange<'a> {
begin_char: usize,
end_char: usize,
scope_selector_text: &'a str,
is_pure_assertion_line: bool,
}

#[derive(Debug)]
struct ScopedText {
scope: Vec<Scope>,
char_start: usize,
text_len: usize,
}

#[derive(Debug)]
struct RangeTestResult {
column_begin: usize,
column_end: usize,
success: bool,
}

fn get_line_assertion_details<'a>(testtoken_start: &str, testtoken_end: Option<&str>, line: &'a str) -> Option<AssertionRange<'a>> {
// if the test start token specified in the test file's header is on the line
if let Some(index) = line.find(testtoken_start) {
let (before_token_start, token_and_rest_of_line) = line.split_at(index);

if let Some(captures) = SYNTAX_TEST_ASSERTION_PATTERN.captures(&token_and_rest_of_line[testtoken_start.len()..]) {
let mut sst = captures.get(3).unwrap().as_str(); // get the scope selector text
let mut only_whitespace_after_token_end = true;

if let Some(token) = testtoken_end { // if there is an end token defined in the test file header
if let Some(end_token_pos) = sst.find(token) { // and there is an end token in the line
let (ss, after_token_end) = sst.split_at(end_token_pos); // the scope selector text ends at the end token
sst = &ss;
only_whitespace_after_token_end = after_token_end.trim_end().is_empty();
}
}
return Some(AssertionRange {
begin_char: index + if captures.get(2).is_some() { testtoken_start.len() + captures.get(2).unwrap().start() } else { 0 },
end_char: index + if captures.get(2).is_some() { testtoken_start.len() + captures.get(2).unwrap().end() } else { 1 },
scope_selector_text: sst,
is_pure_assertion_line: before_token_start.trim_start().is_empty() && only_whitespace_after_token_end, // if only whitespace surrounds the test tokens on the line, then it is a pure assertion line
});
}
}
None
}

fn process_assertions(assertion: &AssertionRange<'_>, test_against_line_scopes: &Vec<ScopedText>) -> Vec<RangeTestResult> {
// format the scope selector to include a space at the beginning, because, currently, ScopeSelector expects excludes to begin with " -"
// and they are sometimes in the syntax test as ^^^-comment, for example
let selector = ScopeSelectors::from_str(&format!(" {}", &assertion.scope_selector_text)).unwrap();
// find the scope at the specified start column, and start matching the selector through the rest of the tokens on the line from there until the end column is reached
let mut results = Vec::new();
for scoped_text in test_against_line_scopes.iter().skip_while(|s|s.char_start + s.text_len <= assertion.begin_char).take_while(|s|s.char_start < assertion.end_char) {
let match_value = selector.does_match(scoped_text.scope.as_slice());
let result = RangeTestResult {
column_begin: max(scoped_text.char_start, assertion.begin_char),
column_end: min(scoped_text.char_start + scoped_text.text_len, assertion.end_char),
success: match_value.is_some()
};
results.push(result);
}
// don't ignore assertions after the newline, they should be treated as though they are asserting against the newline
let last = test_against_line_scopes.last().unwrap();
if last.char_start + last.text_len < assertion.end_char {
let match_value = selector.does_match(last.scope.as_slice());
let result = RangeTestResult {
column_begin: max(last.char_start + last.text_len, assertion.begin_char),
column_end: assertion.end_char,
success: match_value.is_some()
};
results.push(result);
}
results
}

/// If `parse_test_lines` is `false` then lines that only contain assertions are not parsed
fn test_file(ss: &SyntaxSet, path: &Path, parse_test_lines: bool, out_opts: OutputOptions) -> Result<SyntaxTestFileResult, SyntaxTestHeaderError> {
use syntect::util::debug_print_ops;
fn test_file(ss: &SyntaxSet, path: &Path, out_opts: SyntaxTestOutputOptions) -> Result<SyntaxTestFileResult, SyntaxTestHeaderError> {
let f = File::open(path).unwrap();
let mut reader = BufReader::new(f);
let mut line = String::new();
let mut header_line = String::new();

// read the first line from the file - if we have reached EOF already, it's an invalid file
if reader.read_line(&mut line).unwrap() == 0 {
if reader.read_line(&mut header_line).unwrap() == 0 {
return Err(SyntaxTestHeaderError::MalformedHeader);
}

line = line.replace("\r", &"");
header_line = header_line.replace("\r", &"");

// parse the syntax test header in the first line of the file
let header_line = line.clone();
let search_result = SYNTAX_TEST_HEADER_PATTERN.captures(&header_line);
let captures = search_result.ok_or(SyntaxTestHeaderError::MalformedHeader)?;

let testtoken_start = captures.name("testtoken_start").unwrap().as_str();
let testtoken_end = captures.name("testtoken_end").map_or(None, |c|Some(c.as_str()));
let syntax_file = captures.name("syntax_file").unwrap().as_str();
let SyntaxTestHeader { testtoken_start, testtoken_end, syntax_file, reindent_text: _ } = parse_syntax_test_header_line(&header_line).ok_or(SyntaxTestHeaderError::MalformedHeader)?;

// find the relevant syntax definition to parse the file with - case is important!
if !out_opts.summary {
println!("The test file references syntax definition file: {}", syntax_file);
println!("The test file references syntax definition file: {}", syntax_file); //" and the start test token is {} and the end token is {:?}", testtoken_start, testtoken_end);
}
let syntax = ss.find_syntax_by_path(syntax_file).ok_or(SyntaxTestHeaderError::SyntaxDefinitionNotFound)?;

// iterate over the lines of the file, testing them
let mut state = ParseState::new(syntax);
let mut stack = ScopeStack::new();

let mut current_line_number = 1;
let mut test_against_line_number = 1;
let mut scopes_on_line_being_tested = Vec::new();
let mut previous_non_assertion_line = line.to_string();

let mut assertion_failures: usize = 0;
let mut total_assertions: usize = 0;

loop { // over lines of file, starting with the header line
let mut line_only_has_assertion = false;
let mut line_has_assertion = false;
if let Some(assertion) = get_line_assertion_details(testtoken_start, testtoken_end, &line) {
let result = process_assertions(&assertion, &scopes_on_line_being_tested);
total_assertions += &assertion.end_char - &assertion.begin_char;
for failure in result.iter().filter(|r|!r.success) {
let length = failure.column_end - failure.column_begin;
let text: String = previous_non_assertion_line.chars().skip(failure.column_begin).take(length).collect();
if !out_opts.summary {
println!(" Assertion selector {:?} \
from line {:?} failed against line {:?}, column range {:?}-{:?} \
(with text {:?}) \
has scope {:?}",
assertion.scope_selector_text.trim(),
current_line_number, test_against_line_number, failure.column_begin, failure.column_end,
text,
scopes_on_line_being_tested.iter().skip_while(|s|s.char_start + s.text_len <= failure.column_begin).next().unwrap_or(scopes_on_line_being_tested.last().unwrap()).scope
);
}
assertion_failures += failure.column_end - failure.column_begin;
}
line_only_has_assertion = assertion.is_pure_assertion_line;
line_has_assertion = true;
}
if !line_only_has_assertion || parse_test_lines {
if !line_has_assertion { // ST seems to ignore lines that have assertions when calculating which line the assertion tests against
scopes_on_line_being_tested.clear();
test_against_line_number = current_line_number;
previous_non_assertion_line = line.to_string();
}
if out_opts.debug && !line_only_has_assertion {
println!("-- debugging line {} -- scope stack: {:?}", current_line_number, stack);
}
let ops = state.parse_line(&line, &ss);
if out_opts.debug && !line_only_has_assertion {
if ops.is_empty() && !line.is_empty() {
println!("no operations for this line...");
} else {
debug_print_ops(&line, &ops);
}
}
let mut col: usize = 0;
for (s, op) in ScopeRegionIterator::new(&ops, &line) {
stack.apply(op);
if s.is_empty() { // in this case we don't care about blank tokens
continue;
}
if !line_has_assertion {
// if the line has no assertions on it, remember the scopes on the line so we can test against them later
let len = s.chars().count();
scopes_on_line_being_tested.push(
ScopedText {
char_start: col,
text_len: len,
scope: stack.as_slice().to_vec()
}
);
// TODO: warn when there are duplicate adjacent (non-meta?) scopes, as it is almost always undesired
col += len;
}
}
}
let mut contents = String::new();
contents.push_str(&header_line);
reader.read_to_string(&mut contents).expect("Unable to read file");
contents = contents.replace("\r", &"");

line.clear();
current_line_number += 1;
if reader.read_line(&mut line).unwrap() == 0 {
break;
}
line = line.replace("\r", &"");
}
let res = if assertion_failures > 0 {
Ok(SyntaxTestFileResult::FailedAssertions(assertion_failures, total_assertions))
} else {
Ok(SyntaxTestFileResult::Success(total_assertions))
};
let res = process_syntax_test_assertions(&ss, &syntax, &contents, testtoken_start, testtoken_end, &out_opts);

if out_opts.summary {
if let Ok(SyntaxTestFileResult::FailedAssertions(failures, _)) = res {
if let SyntaxTestFileResult::FailedAssertions(failures, _) = res {
// Don't print total assertion count so that diffs don't pick up new succeeding tests
println!("FAILED {}: {}", path.display(), failures);
}
} else {
println!("{:?}", res);
}

res
Ok(res)
}

fn main() {
Expand All @@ -271,6 +75,7 @@ fn main() {
opts.optflag("d", "debug", "Show parsing results for each test line");
opts.optflag("t", "time", "Time execution as a more broad-ranging benchmark");
opts.optflag("s", "summary", "Print only summary of test failures");
opts.optflag("f", "failfast", "Stop at first failure");

let matches = match opts.parse(&args[1..]) {
Ok(m) => { m }
Expand Down Expand Up @@ -305,10 +110,11 @@ fn main() {
ss = builder.build();
}

let out_opts = OutputOptions {
let out_opts = SyntaxTestOutputOptions {
debug: matches.opt_present("debug"),
time: matches.opt_present("time"),
summary: matches.opt_present("summary"),
failfast: matches.opt_present("failfast"),
};

let exit_code = recursive_walk(&ss, &tests_path, out_opts);
Expand All @@ -318,7 +124,7 @@ fn main() {
}


fn recursive_walk(ss: &SyntaxSet, path: &str, out_opts: OutputOptions) -> i32 {
fn recursive_walk(ss: &SyntaxSet, path: &str, out_opts: SyntaxTestOutputOptions) -> i32 {
let mut exit_code: i32 = 0; // exit with code 0 by default, if all tests pass
let walker = WalkDir::new(path).into_iter();

Expand All @@ -337,18 +143,25 @@ fn recursive_walk(ss: &SyntaxSet, path: &str, out_opts: OutputOptions) -> i32 {
println!("Testing file {}", path.display());
}
let start = Instant::now();
let result = test_file(&ss, path, true, out_opts);
let result = test_file(&ss, path, out_opts);
let elapsed = start.elapsed();
if out_opts.time {
let ms = (elapsed.as_secs() * 1_000) + (elapsed.subsec_nanos() / 1_000_000) as u64;
println!("{} ms for file {}", ms, path.display());
}
if exit_code != 2 { // leave exit code 2 if there was an error
if let Err(_) = result { // set exit code 2 if there was an error
println!("{:?}", result);
exit_code = 2;
if out_opts.failfast {
break;
}
} else if let Ok(ok) = result {
if let SyntaxTestFileResult::FailedAssertions(_, _) = ok {
exit_code = 1; // otherwise, if there were failures, exit with code 1
if out_opts.failfast {
break;
}
}
}
}
Expand Down
2 changes: 2 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ extern crate pretty_assertions;
pub mod dumps;
#[cfg(feature = "parsing")]
pub mod easy;
#[cfg(feature = "parsing")]
pub mod syntax_tests;
#[cfg(feature = "html")]
mod escape;
pub mod highlighting;
Expand Down