Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

initial rework of syntest to be usable internally #185

Open
wants to merge 14 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
218 changes: 17 additions & 201 deletions examples/syntest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,14 @@ extern crate regex;
extern crate getopts;

//extern crate onig;
use syntect::parsing::{SyntaxSet, ParseState, ScopeStack, Scope};
use syntect::highlighting::ScopeSelectors;
use syntect::easy::{ScopeRegionIterator};
use syntect::parsing::{SyntaxSet};
use syntect::easy::{SyntaxTestFileResult, SyntaxTestOutputOptions, process_syntax_test_assertions};
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this should go in the easy module.

That's for simple wrappers for common use cases. Syntax tests are neither a wrapper around a more advanced API, nor a common use case.

They should probably go in their own syntax_tests module or something


use std::path::Path;
use std::io::prelude::*;
use std::io::{BufRead, BufReader};
use std::fs::File;
use std::cmp::{min, max};
use std::time::Instant;
use std::str::FromStr;

use getopts::Options;
use regex::Regex;
Expand All @@ -36,126 +34,26 @@ pub enum SyntaxTestHeaderError {
SyntaxDefinitionNotFound,
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub enum SyntaxTestFileResult {
FailedAssertions(usize, usize),
Success(usize),
}

lazy_static! {
pub static ref SYNTAX_TEST_HEADER_PATTERN: Regex = Regex::new(r#"(?xm)
^(?P<testtoken_start>\s*\S+)
\s+SYNTAX\sTEST\s+
"(?P<syntax_file>[^"]+)"
\s*(?P<testtoken_end>\S+)?$
"#).unwrap();
pub static ref SYNTAX_TEST_ASSERTION_PATTERN: Regex = Regex::new(r#"(?xm)
\s*(?:
(?P<begin_of_token><-)|(?P<range>\^+)
)(.*)$"#).unwrap();
}

#[derive(Clone, Copy)]
struct OutputOptions {
time: bool,
debug: bool,
summary: bool,
}

#[derive(Debug)]
struct AssertionRange<'a> {
begin_char: usize,
end_char: usize,
scope_selector_text: &'a str,
is_pure_assertion_line: bool,
}

#[derive(Debug)]
struct ScopedText {
scope: Vec<Scope>,
char_start: usize,
text_len: usize,
}

#[derive(Debug)]
struct RangeTestResult {
column_begin: usize,
column_end: usize,
success: bool,
}

fn get_line_assertion_details<'a>(testtoken_start: &str, testtoken_end: Option<&str>, line: &'a str) -> Option<AssertionRange<'a>> {
// if the test start token specified in the test file's header is on the line
if let Some(index) = line.find(testtoken_start) {
let (before_token_start, token_and_rest_of_line) = line.split_at(index);

if let Some(captures) = SYNTAX_TEST_ASSERTION_PATTERN.captures(&token_and_rest_of_line[testtoken_start.len()..]) {
let mut sst = captures.get(3).unwrap().as_str(); // get the scope selector text
let mut only_whitespace_after_token_end = true;

if let Some(token) = testtoken_end { // if there is an end token defined in the test file header
if let Some(end_token_pos) = sst.find(token) { // and there is an end token in the line
let (ss, after_token_end) = sst.split_at(end_token_pos); // the scope selector text ends at the end token
sst = &ss;
only_whitespace_after_token_end = after_token_end.trim_right().is_empty();
}
}
return Some(AssertionRange {
begin_char: index + if captures.get(2).is_some() { testtoken_start.len() + captures.get(2).unwrap().start() } else { 0 },
end_char: index + if captures.get(2).is_some() { testtoken_start.len() + captures.get(2).unwrap().end() } else { 1 },
scope_selector_text: sst,
is_pure_assertion_line: before_token_start.trim_left().is_empty() && only_whitespace_after_token_end, // if only whitespace surrounds the test tokens on the line, then it is a pure assertion line
});
}
}
None
}

fn process_assertions(assertion: &AssertionRange, test_against_line_scopes: &Vec<ScopedText>) -> Vec<RangeTestResult> {
// format the scope selector to include a space at the beginning, because, currently, ScopeSelector expects excludes to begin with " -"
// and they are sometimes in the syntax test as ^^^-comment, for example
let selector = ScopeSelectors::from_str(&format!(" {}", &assertion.scope_selector_text)).unwrap();
// find the scope at the specified start column, and start matching the selector through the rest of the tokens on the line from there until the end column is reached
let mut results = Vec::new();
for scoped_text in test_against_line_scopes.iter().skip_while(|s|s.char_start + s.text_len <= assertion.begin_char).take_while(|s|s.char_start < assertion.end_char) {
let match_value = selector.does_match(scoped_text.scope.as_slice());
let result = RangeTestResult {
column_begin: max(scoped_text.char_start, assertion.begin_char),
column_end: min(scoped_text.char_start + scoped_text.text_len, assertion.end_char),
success: match_value.is_some()
};
results.push(result);
}
// don't ignore assertions after the newline, they should be treated as though they are asserting against the newline
let last = test_against_line_scopes.last().unwrap();
if last.char_start + last.text_len < assertion.end_char {
let match_value = selector.does_match(last.scope.as_slice());
let result = RangeTestResult {
column_begin: max(last.char_start + last.text_len, assertion.begin_char),
column_end: assertion.end_char,
success: match_value.is_some()
};
results.push(result);
}
results
}

/// If `parse_test_lines` is `false` then lines that only contain assertions are not parsed
fn test_file(ss: &SyntaxSet, path: &Path, parse_test_lines: bool, out_opts: OutputOptions) -> Result<SyntaxTestFileResult, SyntaxTestHeaderError> {
use syntect::util::debug_print_ops;
fn test_file(ss: &SyntaxSet, path: &Path, out_opts: SyntaxTestOutputOptions) -> Result<SyntaxTestFileResult, SyntaxTestHeaderError> {
let f = File::open(path).unwrap();
let mut reader = BufReader::new(f);
let mut line = String::new();
let mut header_line = String::new();

// read the first line from the file - if we have reached EOF already, it's an invalid file
if reader.read_line(&mut line).unwrap() == 0 {
if reader.read_line(&mut header_line).unwrap() == 0 {
return Err(SyntaxTestHeaderError::MalformedHeader);
}

line = line.replace("\r", &"");

// parse the syntax test header in the first line of the file
let header_line = line.clone();
let search_result = SYNTAX_TEST_HEADER_PATTERN.captures(&header_line);
let captures = search_result.ok_or(SyntaxTestHeaderError::MalformedHeader)?;

Expand All @@ -165,109 +63,27 @@ fn test_file(ss: &SyntaxSet, path: &Path, parse_test_lines: bool, out_opts: Outp

// find the relevant syntax definition to parse the file with - case is important!
if !out_opts.summary {
println!("The test file references syntax definition file: {}", syntax_file);
println!("The test file references syntax definition file: {}", syntax_file); //" and the start test token is {} and the end token is {:?}", testtoken_start, testtoken_end);
}
let syntax = ss.find_syntax_by_path(syntax_file).ok_or(SyntaxTestHeaderError::SyntaxDefinitionNotFound)?;

// iterate over the lines of the file, testing them
let mut state = ParseState::new(syntax);
let mut stack = ScopeStack::new();
let mut contents = String::new();
contents.push_str(&header_line);
reader.read_to_string(&mut contents).expect("Unable to read file");
contents = contents.replace("\r", &"");

let mut current_line_number = 1;
let mut test_against_line_number = 1;
let mut scopes_on_line_being_tested = Vec::new();
let mut previous_non_assertion_line = line.to_string();

let mut assertion_failures: usize = 0;
let mut total_assertions: usize = 0;

loop { // over lines of file, starting with the header line
let mut line_only_has_assertion = false;
let mut line_has_assertion = false;
if let Some(assertion) = get_line_assertion_details(testtoken_start, testtoken_end, &line) {
let result = process_assertions(&assertion, &scopes_on_line_being_tested);
total_assertions += &assertion.end_char - &assertion.begin_char;
for failure in result.iter().filter(|r|!r.success) {
let length = failure.column_end - failure.column_begin;
let text: String = previous_non_assertion_line.chars().skip(failure.column_begin).take(length).collect();
if !out_opts.summary {
println!(" Assertion selector {:?} \
from line {:?} failed against line {:?}, column range {:?}-{:?} \
(with text {:?}) \
has scope {:?}",
assertion.scope_selector_text.trim(),
current_line_number, test_against_line_number, failure.column_begin, failure.column_end,
text,
scopes_on_line_being_tested.iter().skip_while(|s|s.char_start + s.text_len <= failure.column_begin).next().unwrap_or(scopes_on_line_being_tested.last().unwrap()).scope
);
}
assertion_failures += failure.column_end - failure.column_begin;
}
line_only_has_assertion = assertion.is_pure_assertion_line;
line_has_assertion = true;
}
if !line_only_has_assertion || parse_test_lines {
if !line_has_assertion { // ST seems to ignore lines that have assertions when calculating which line the assertion tests against
scopes_on_line_being_tested.clear();
test_against_line_number = current_line_number;
previous_non_assertion_line = line.to_string();
}
if out_opts.debug && !line_only_has_assertion {
println!("-- debugging line {} -- scope stack: {:?}", current_line_number, stack);
}
let ops = state.parse_line(&line);
if out_opts.debug && !line_only_has_assertion {
if ops.is_empty() && !line.is_empty() {
println!("no operations for this line...");
} else {
debug_print_ops(&line, &ops);
}
}
let mut col: usize = 0;
for (s, op) in ScopeRegionIterator::new(&ops, &line) {
stack.apply(op);
if s.is_empty() { // in this case we don't care about blank tokens
continue;
}
if !line_has_assertion {
// if the line has no assertions on it, remember the scopes on the line so we can test against them later
let len = s.chars().count();
scopes_on_line_being_tested.push(
ScopedText {
char_start: col,
text_len: len,
scope: stack.as_slice().to_vec()
}
);
// TODO: warn when there are duplicate adjacent (non-meta?) scopes, as it is almost always undesired
col += len;
}
}
}

line.clear();
current_line_number += 1;
if reader.read_line(&mut line).unwrap() == 0 {
break;
}
line = line.replace("\r", &"");
}
let res = if assertion_failures > 0 {
Ok(SyntaxTestFileResult::FailedAssertions(assertion_failures, total_assertions))
} else {
Ok(SyntaxTestFileResult::Success(total_assertions))
};
let res = process_syntax_test_assertions(&syntax, &contents, testtoken_start, testtoken_end, &out_opts);

if out_opts.summary {
if let Ok(SyntaxTestFileResult::FailedAssertions(failures, _)) = res {
if let SyntaxTestFileResult::FailedAssertions(failures, _) = res {
// Don't print total assertion count so that diffs don't pick up new succeeding tests
println!("FAILED {}: {}", path.display(), failures);
}
} else {
println!("{:?}", res);
}

res
Ok(res)
}

fn main() {
Expand Down Expand Up @@ -309,7 +125,7 @@ fn main() {
ss.link_syntaxes();
}

let out_opts = OutputOptions {
let out_opts = SyntaxTestOutputOptions {
debug: matches.opt_present("debug"),
time: matches.opt_present("time"),
summary: matches.opt_present("summary"),
Expand All @@ -322,7 +138,7 @@ fn main() {
}


fn recursive_walk(ss: &SyntaxSet, path: &str, out_opts: OutputOptions) -> i32 {
fn recursive_walk(ss: &SyntaxSet, path: &str, out_opts: SyntaxTestOutputOptions) -> i32 {
let mut exit_code: i32 = 0; // exit with code 0 by default, if all tests pass
let walker = WalkDir::new(path).into_iter();

Expand All @@ -341,7 +157,7 @@ fn recursive_walk(ss: &SyntaxSet, path: &str, out_opts: OutputOptions) -> i32 {
println!("Testing file {}", path.display());
}
let start = Instant::now();
let result = test_file(&ss, path, true, out_opts);
let result = test_file(&ss, path, out_opts);
let elapsed = start.elapsed();
if out_opts.time {
let ms = (elapsed.as_secs() * 1_000) + (elapsed.subsec_nanos() / 1_000_000) as u64;
Expand Down