src/parsing/parser.rs

// Suppression of a false positive clippy lint. Upstream issue:
//
//   mutable_key_type false positive for raw pointers
//   https://github.com/rust-lang/rust-clippy/issues/6745
//
// We use `*const MatchPattern` as key in our `SearchCache` hash map.
// Clippy thinks this is a problem since `MatchPattern` has interior mutability
// via `MatchPattern::regex::regex` which is an `AtomicLazyCell`.
// But raw pointers are hashed via the pointer itself, not what is pointed to.
// See https://github.com/rust-lang/rust/blob/1.54.0/library/core/src/hash/mod.rs#L717-L725
#![allow(clippy::mutable_key_type)]

use super::syntax_definition::*;
use super::scope::*;
use super::regex::Region;
use std::usize;
use std::collections::HashMap;
use std::i32;
use std::hash::BuildHasherDefault;
use fnv::FnvHasher;
use crate::parsing::syntax_set::{SyntaxSet, SyntaxReference};
use crate::parsing::syntax_definition::ContextId;

/// Errors that can occur while parsing.
#[derive(Debug, thiserror::Error)]
#[non_exhaustive]
pub enum ParsingError {
    #[error("Somehow main context was popped from the stack")]
    MissingMainContext,
    /// A context is missing. Usually caused by a syntax referencing a another
    /// syntax that is not known to syntect. See e.g. <https://github.com/trishume/syntect/issues/421>
    #[error("Missing context with ID '{0:?}'")]
    MissingContext(ContextId),
    #[error("Bad index to match_at: {0}")]
    BadMatchIndex(usize),
    #[error("Tried to use a ContextReference that has not bee resolved yet: {0:?}")]
    UnresolvedContextReference(ContextReference),
}

/// Keeps the current parser state (the internal syntax interpreter stack) between lines of parsing.
///
/// If you are parsing an entire file you create one of these at the start and use it
/// all the way to the end.
///
/// # Caching
///
/// One reason this is exposed is that since it implements `Clone` you can actually cache
/// these (probably along with a [`HighlightState`]) and only re-start parsing from the point of a change.
/// See the docs for [`HighlightState`] for more in-depth discussion of caching.
///
/// This state doesn't keep track of the current scope stack and parsing only returns changes to this stack
/// so if you want to construct scope stacks you'll need to keep track of that as well.
/// Note that [`HighlightState`] contains exactly this as a public field that you can use.
///
/// **Note:** Caching is for advanced users who have tons of time to maximize performance or want to do so eventually.
/// It is not recommended that you try caching the first time you implement highlighting.
///
/// [`HighlightState`]: ../highlighting/struct.HighlightState.html
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct ParseState {
    stack: Vec<StateLevel>,
    first_line: bool,
    // See issue #101. Contains indices of frames pushed by `with_prototype`s.
    // Doesn't look at `with_prototype`s below top of stack.
    proto_starts: Vec<usize>,
}

#[derive(Debug, Clone, Eq, PartialEq)]
struct StateLevel {
    context: ContextId,
    prototypes: Vec<ContextId>,
    captures: Option<(Region, String)>,
}

#[derive(Debug)]
struct RegexMatch<'a> {
    regions: Region,
    context: &'a Context,
    pat_index: usize,
    from_with_prototype: bool,
    would_loop: bool,
}

/// Maps the pattern to the start index, which is -1 if not found.
type SearchCache = HashMap<*const MatchPattern, Option<Region>, BuildHasherDefault<FnvHasher>>;

// To understand the implementation of this, here's an introduction to how
// Sublime Text syntax definitions work.
//
// Let's say we have the following made-up syntax definition:
//
//     contexts:
//       main:
//         - match: A
//           scope: scope.a.first
//           push: context-a
//         - match: b
//           scope: scope.b
//         - match: \w+
//           scope: scope.other
//       context-a:
//         - match: a+
//           scope: scope.a.rest
//         - match: (?=.)
//           pop: true
//
// There are two contexts, `main` and `context-a`. Each context contains a list
// of match rules with instructions for how to proceed.
//
// Let's say we have the input string " Aaaabxxx". We start at position 0 in
// the string. We keep a stack of contexts, which at the beginning is just main.
//
// So we start by looking at the top of the context stack (main), and look at
// the rules in order. The rule that wins is the first one that matches
// "earliest" in the input string. In our example:
//
// 1. The first one matches "A". Note that matches are not anchored, so this
//    matches at position 1.
// 2. The second one matches "b", so position 5. The first rule is winning.
// 3. The third one matches "\w+", so also position 1. But because the first
//    rule comes first, it wins.
//
// So now we execute the winning rule. Whenever we matched some text, we assign
// the scope (if there is one) to the matched text and advance our position to
// after the matched text. The scope is "scope.a.first" and our new position is
// after the "A", so 2. The "push" means that we should change our stack by
// pushing `context-a` on top of it.
//
// In the next step, we repeat the above, but now with the rules in `context-a`.
// The result is that we match "a+" and assign "scope.a.rest" to "aaa", and our
// new position is now after the "aaa". Note that there was no instruction for
// changing the stack, so we stay in that context.
//
// In the next step, the first rule doesn't match anymore, so we go to the next
// rule where "(?=.)" matches. The instruction is to "pop", which means we
// pop the top of our context stack, which means we're now back in main.
//
// This time in main, we match "b", and in the next step we match the rest with
// "\w+", and we're done.
//
//
// ## Preventing loops
//
// These are the basics of how matching works. Now, you saw that you can write
// patterns that result in an empty match and don't change the position. These
// are called non-consuming matches. The problem with them is that they could
// result in infinite loops. Let's look at a syntax where that is the case:
//
//     contexts:
//       main:
//         - match: (?=.)
//           push: test
//       test:
//         - match: \w+
//           scope: word
//         - match: (?=.)
//           pop: true
//
// This is a bit silly, but it's a minimal example for explaining how matching
// works in that case.
//
// Let's say we have the input string " hello". In `main`, our rule matches and
// we go into `test` and stay at position 0. Now, the best match is the rule
// with "pop". But if we used that rule, we'd pop back to `main` and would still
// be at the same position we started at! So this would be an infinite loop,
// which we don't want.
//
// So what Sublime Text does in case a looping rule "won":
//
// * If there's another rule that matches at the same position and does not
//   result in a loop, use that instead.
// * Otherwise, go to the next position and go through all the rules in the
//   current context again. Note that it means that the "pop" could again be the
//   winning rule, but that's ok as it wouldn't result in a loop anymore.
//
// So in our input string, we'd skip one character and try to match the rules
// again. This time, the "\w+" wins because it comes first.

impl ParseState {
    /// Creates a state from a syntax definition, keeping its own reference-counted point to the
    /// main context of the syntax
    pub fn new(syntax: &SyntaxReference) -> ParseState {
        let start_state = StateLevel {
            context: syntax.context_ids()["__start"],
            prototypes: Vec::new(),
            captures: None,
        };
        ParseState {
            stack: vec![start_state],
            first_line: true,
            proto_starts: Vec::new(),
        }
    }

    /// Parses a single line of the file. Because of the way regex engines work you unfortunately
    /// have to pass in a single line contiguous in memory. This can be bad for really long lines.
    /// Sublime Text avoids this by just not highlighting lines that are too long (thousands of characters).
    ///
    /// For efficiency reasons this returns only the changes to the current scope at each point in the line.
    /// You can use [`ScopeStack::apply`] on each operation in succession to get the stack for a given point.
    /// Look at the code in `highlighter.rs` for an example of doing this for highlighting purposes.
    ///
    /// The returned vector is in order both by index to apply at (the `usize`) and also by order to apply them at a
    /// given index (e.g popping old scopes before pushing new scopes).
    ///
    /// The [`SyntaxSet`] has to be the one that contained the syntax that was used to construct
    /// this [`ParseState`], or an extended version of it. Otherwise the parsing would return the
    /// wrong result or even panic. The reason for this is that contexts within the [`SyntaxSet`]
    /// are referenced via indexes.
    ///
    /// [`ScopeStack::apply`]: struct.ScopeStack.html#method.apply
    /// [`SyntaxSet`]: struct.SyntaxSet.html
    /// [`ParseState`]: struct.ParseState.html
    pub fn parse_line(
        &mut self,
        line: &str,
        syntax_set: &SyntaxSet,
    ) -> Result<Vec<(usize, ScopeStackOp)>, ParsingError> {
        if self.stack.is_empty() {
            return Err(ParsingError::MissingMainContext)
        }
        let mut match_start = 0;
        let mut res = Vec::new();

        if self.first_line {
            let cur_level = &self.stack[self.stack.len() - 1];
            let context = syntax_set.get_context(&cur_level.context)?;
            if !context.meta_content_scope.is_empty() {
                res.push((0, ScopeStackOp::Push(context.meta_content_scope[0])));
            }
            self.first_line = false;
        }

        let mut regions = Region::new();
        let fnv = BuildHasherDefault::<FnvHasher>::default();
        let mut search_cache: SearchCache = HashMap::with_capacity_and_hasher(128, fnv);
        // Used for detecting loops with push/pop, see long comment above.
        let mut non_consuming_push_at = (0, 0);

        while self.parse_next_token(
            line,
            syntax_set,
            &mut match_start,
            &mut search_cache,
            &mut regions,
            &mut non_consuming_push_at,
            &mut res
        )? {}

        Ok(res)
    }

    #[allow(clippy::too_many_arguments)]
    fn parse_next_token(
        &mut self,
        line: &str,
        syntax_set: &SyntaxSet,
        start: &mut usize,
        search_cache: &mut SearchCache,
        regions: &mut Region,
        non_consuming_push_at: &mut (usize, usize),
        ops: &mut Vec<(usize, ScopeStackOp)>,
    ) -> Result<bool, ParsingError> {
        let check_pop_loop = {
            let (pos, stack_depth) = *non_consuming_push_at;
            pos == *start && stack_depth == self.stack.len()
        };

        // Trim proto_starts that are no longer valid
        while self.proto_starts.last().map(|start| *start >= self.stack.len()).unwrap_or(false) {
            self.proto_starts.pop();
        }

        let best_match = self.find_best_match(line, *start, syntax_set, search_cache, regions, check_pop_loop)?;

        if let Some(reg_match) = best_match {
            if reg_match.would_loop {
                // A push that doesn't consume anything (a regex that resulted
                // in an empty match at the current position) can not be
                // followed by a non-consuming pop. Otherwise we're back where
                // we started and would try the same sequence of matches again,
                // resulting in an infinite loop. In this case, Sublime Text
                // advances one character and tries again, thus preventing the
                // loop.

                // println!("pop_would_loop for match {:?}, start {}", reg_match, *start);

                // nth(1) gets the next character if there is one. Need to do
                // this instead of just += 1 because we have byte indices and
                // unicode characters can be more than 1 byte.
                if let Some((i, _)) = line[*start..].char_indices().nth(1) {
                    *start += i;
                    return Ok(true);
                } else {
                    // End of line, no character to advance and no point trying
                    // any more patterns.
                    return Ok(false);
                }
            }

            let match_end = reg_match.regions.pos(0).unwrap().1;

            let consuming = match_end > *start;
            if !consuming {
                // The match doesn't consume any characters. If this is a
                // "push", remember the position and stack size so that we can
                // check the next "pop" for loops. Otherwise leave the state,
                // e.g. non-consuming "set" could also result in a loop.
                let context = reg_match.context;
                let match_pattern = context.match_at(reg_match.pat_index)?;
                if let MatchOperation::Push(_) = match_pattern.operation {
                    *non_consuming_push_at = (match_end, self.stack.len() + 1);
                }
            }

            *start = match_end;

            // ignore `with_prototype`s below this if a context is pushed
            if reg_match.from_with_prototype {
                // use current height, since we're before the actual push
                self.proto_starts.push(self.stack.len());
            }

            let level_context = {
                let id = &self.stack[self.stack.len() - 1].context;
                syntax_set.get_context(id)?
            };
            self.exec_pattern(line, &reg_match, level_context, syntax_set, ops)?;

            Ok(true)
        } else {
            Ok(false)
        }
    }

    fn find_best_match<'a>(
        &self,
        line: &str,
        start: usize,
        syntax_set: &'a SyntaxSet,
        search_cache: &mut SearchCache,
        regions: &mut Region,
        check_pop_loop: bool,
    ) -> Result<Option<RegexMatch<'a>>, ParsingError> {
        let cur_level = &self.stack[self.stack.len() - 1];
        let context = syntax_set.get_context(&cur_level.context)?;
        let prototype = if let Some(ref p) = context.prototype {
            Some(p)
        } else {
            None
        };

        // Build an iterator for the contexts we want to visit in order
        let context_chain = {
            let proto_start = self.proto_starts.last().cloned().unwrap_or(0);
            // Sublime applies with_prototypes from bottom to top
            let with_prototypes = self.stack[proto_start..].iter().flat_map(|lvl| lvl.prototypes.iter().map(move |ctx| (true, ctx, lvl.captures.as_ref())));
            let cur_prototype = prototype.into_iter().map(|ctx| (false, ctx, None));
            let cur_context = Some((false, &cur_level.context, cur_level.captures.as_ref())).into_iter();
            with_prototypes.chain(cur_prototype).chain(cur_context)
        };

        // println!("{:#?}", cur_level);
        // println!("token at {} on {}", start, line.trim_right());

        let mut min_start = usize::MAX;
        let mut best_match: Option<RegexMatch<'_>> = None;
        let mut pop_would_loop = false;

        for (from_with_proto, ctx, captures) in context_chain {
            for (pat_context, pat_index) in context_iter(syntax_set, syntax_set.get_context(ctx)?) {
                let match_pat = pat_context.match_at(pat_index)?;

                if let Some(match_region) = self.search(
                    line, start, match_pat, captures, search_cache, regions
                ) {
                    let (match_start, match_end) = match_region.pos(0).unwrap();

                    // println!("matched pattern {:?} at start {} end {}", match_pat.regex_str, match_start, match_end);

                    if match_start < min_start || (match_start == min_start && pop_would_loop) {
                        // New match is earlier in text than old match,
                        // or old match was a looping pop at the same
                        // position.

                        // println!("setting as current match");

                        min_start = match_start;

                        let consuming = match_end > start;
                        pop_would_loop = check_pop_loop
                            && !consuming
                            && matches!(match_pat.operation, MatchOperation::Pop);

                        best_match = Some(RegexMatch {
                            regions: match_region,
                            context: pat_context,
                            pat_index,
                            from_with_prototype: from_with_proto,
                            would_loop: pop_would_loop,
                        });

                        if match_start == start && !pop_would_loop {
                            // We're not gonna find a better match after this,
                            // so as an optimization we can stop matching now.
                            return Ok(best_match);
                        }
                    }
                }
            }
        }
        Ok(best_match)
    }

    fn search(&self,
              line: &str,
              start: usize,
              match_pat: &MatchPattern,
              captures: Option<&(Region, String)>,
              search_cache: &mut SearchCache,
              regions: &mut Region,
    ) -> Option<Region> {
        // println!("{} - {:?} - {:?}", match_pat.regex_str, match_pat.has_captures, cur_level.captures.is_some());
        let match_ptr = match_pat as *const MatchPattern;

        if let Some(maybe_region) = search_cache.get(&match_ptr) {
            if let Some(ref region) = *maybe_region {
                let match_start = region.pos(0).unwrap().0;
                if match_start >= start {
                    // Cached match is valid, return it. Otherwise do another
                    // search below.
                    return Some(region.clone());
                }
            } else {
                // Didn't find a match earlier, so no point trying to match it again
                return None;
            }
        }

        let (matched, can_cache) = match (match_pat.has_captures, captures) {
            (true, Some(captures)) => {
                let &(ref region, ref s) = captures;
                let regex = match_pat.regex_with_refs(region, s);
                let matched = regex.search(line, start, line.len(), Some(regions));
                (matched, false)
            }
            _ => {
                let regex = match_pat.regex();
                let matched = regex.search(line, start, line.len(), Some(regions));
                (matched, true)
            }
        };

        if matched {
            let (match_start, match_end) = regions.pos(0).unwrap();
            // this is necessary to avoid infinite looping on dumb patterns
            let does_something = match match_pat.operation {
                MatchOperation::None => match_start != match_end,
                _ => true,
            };
            if can_cache && does_something {
                search_cache.insert(match_pat, Some(regions.clone()));
            }
            if does_something {
                // print!("catch {} at {} on {}", match_pat.regex_str, match_start, line);
                return Some(regions.clone());
            }
        } else if can_cache {
            search_cache.insert(match_pat, None);
        }
        None
    }

    /// Returns true if the stack was changed
    fn exec_pattern<'a>(
        &mut self,
        line: &str,
        reg_match: &RegexMatch<'a>,
        level_context: &'a Context,
        syntax_set: &'a SyntaxSet,
        ops: &mut Vec<(usize, ScopeStackOp)>,
    ) -> Result<bool, ParsingError> {
        let (match_start, match_end) = reg_match.regions.pos(0).unwrap();
        let context = reg_match.context;
        let pat = context.match_at(reg_match.pat_index)?;
        // println!("running pattern {:?} on '{}' at {}, operation {:?}", pat.regex_str, line, match_start, pat.operation);

        self.push_meta_ops(true, match_start, level_context, &pat.operation, syntax_set, ops)?;
        for s in &pat.scope {
            // println!("pushing {:?} at {}", s, match_start);
            ops.push((match_start, ScopeStackOp::Push(*s)));
        }
        if let Some(ref capture_map) = pat.captures {
            // captures could appear in an arbitrary order, have to produce ops in right order
            // ex: ((bob)|(hi))* could match hibob in wrong order, and outer has to push first
            // we don't have to handle a capture matching multiple times, Sublime doesn't
            let mut map: Vec<((usize, i32), ScopeStackOp)> = Vec::new();
            for &(cap_index, ref scopes) in capture_map.iter() {
                if let Some((cap_start, cap_end)) = reg_match.regions.pos(cap_index) {
                    // marking up empty captures causes pops to be sorted wrong
                    if cap_start == cap_end {
                        continue;
                    }
                    // println!("capture {:?} at {:?}-{:?}", scopes[0], cap_start, cap_end);
                    for scope in scopes.iter() {
                        map.push(((cap_start, -((cap_end - cap_start) as i32)),
                                  ScopeStackOp::Push(*scope)));
                    }
                    map.push(((cap_end, i32::MIN), ScopeStackOp::Pop(scopes.len())));
                }
            }
            map.sort_by(|a, b| a.0.cmp(&b.0));
            for ((index, _), op) in map.into_iter() {
                ops.push((index, op));
            }
        }
        if !pat.scope.is_empty() {
            // println!("popping at {}", match_end);
            ops.push((match_end, ScopeStackOp::Pop(pat.scope.len())));
        }
        self.push_meta_ops(false, match_end, &*level_context, &pat.operation, syntax_set, ops)?;

        self.perform_op(line, &reg_match.regions, pat, syntax_set)
    }

    fn push_meta_ops<'a>(
        &self,
        initial: bool,
        index: usize,
        cur_context: &Context,
        match_op: &MatchOperation,
        syntax_set: &'a SyntaxSet,
        ops: &mut Vec<(usize, ScopeStackOp)>,
    ) -> Result<(), ParsingError>{
        // println!("metas ops for {:?}, initial: {}",
        //          match_op,
        //          initial);
        // println!("{:?}", cur_context.meta_scope);
        match *match_op {
            MatchOperation::Pop => {
                let v = if initial {
                    &cur_context.meta_content_scope
                } else {
                    &cur_context.meta_scope
                };
                if !v.is_empty() {
                    ops.push((index, ScopeStackOp::Pop(v.len())));
                }

                // cleared scopes are restored after the scopes from match pattern that invoked the pop are applied
                if !initial && cur_context.clear_scopes != None {
                    ops.push((index, ScopeStackOp::Restore))
                }
            },
            // for some reason the ST3 behaviour of set is convoluted and is inconsistent with the docs and other ops
            // - the meta_content_scope of the current context is applied to the matched thing, unlike pop
            // - the clear_scopes are applied after the matched token, unlike push
            // - the interaction with meta scopes means that the token has the meta scopes of both the current scope and the new scope.
            MatchOperation::Push(ref context_refs) |
            MatchOperation::Set(ref context_refs) => {
                let is_set = matches!(*match_op, MatchOperation::Set(_));
                // a match pattern that "set"s keeps the meta_content_scope and meta_scope from the previous context
                if initial {
                    if is_set && cur_context.clear_scopes != None {
                        // cleared scopes from the old context are restored immediately
                        ops.push((index, ScopeStackOp::Restore));
                    }
                    // add each context's meta scope
                    for r in context_refs.iter() {
                        let ctx = r.resolve(syntax_set)?;

                        if !is_set {
                            if let Some(clear_amount) = ctx.clear_scopes {
                                ops.push((index, ScopeStackOp::Clear(clear_amount)));
                            }
                        }

                        for scope in ctx.meta_scope.iter() {
                            ops.push((index, ScopeStackOp::Push(*scope)));
                        }
                    }
                } else {
                    let repush = (is_set && (!cur_context.meta_scope.is_empty() || !cur_context.meta_content_scope.is_empty())) || context_refs.iter().any(|r| {
                        let ctx = r.resolve(syntax_set).unwrap();

                        !ctx.meta_content_scope.is_empty() || (ctx.clear_scopes.is_some() && is_set)
                    });
                    if repush {
                        // remove previously pushed meta scopes, so that meta content scopes will be applied in the correct order
                        let mut num_to_pop : usize = context_refs.iter().map(|r| {
                            let ctx = r.resolve(syntax_set).unwrap();
                            ctx.meta_scope.len()
                        }).sum();

                        // also pop off the original context's meta scopes
                        if is_set {
                            num_to_pop += cur_context.meta_content_scope.len() + cur_context.meta_scope.len();
                        }

                        // do all the popping as one operation
                        if num_to_pop > 0 {
                            ops.push((index, ScopeStackOp::Pop(num_to_pop)));
                        }

                        // now we push meta scope and meta context scope for each context pushed
                        for r in context_refs {
                            let ctx = r.resolve(syntax_set)?;

                            // for some reason, contrary to my reading of the docs, set does this after the token
                            if is_set {
                                if let Some(clear_amount) = ctx.clear_scopes {
                                    ops.push((index, ScopeStackOp::Clear(clear_amount)));
                                }
                            }

                            for scope in ctx.meta_scope.iter() {
                                ops.push((index, ScopeStackOp::Push(*scope)));
                            }
                            for scope in ctx.meta_content_scope.iter() {
                                ops.push((index, ScopeStackOp::Push(*scope)));
                            }
                        }
                    }
                }
            },
            MatchOperation::None => (),
        }

        Ok(())
    }

    /// Returns true if the stack was changed
    fn perform_op(
        &mut self,
        line: &str,
        regions: &Region,
        pat: &MatchPattern,
        syntax_set: &SyntaxSet
    ) -> Result<bool, ParsingError> {
        let (ctx_refs, old_proto_ids) = match pat.operation {
            MatchOperation::Push(ref ctx_refs) => (ctx_refs, None),
            MatchOperation::Set(ref ctx_refs) => {
                // a `with_prototype` stays active when the context is `set`
                // until the context layer in the stack (where the `with_prototype`
                // was initially applied) is popped off.
                (ctx_refs, self.stack.pop().map(|s| s.prototypes))
            }
            MatchOperation::Pop => {
                self.stack.pop();
                return Ok(true);
            }
            MatchOperation::None => return Ok(false),
        };
        for (i, r) in ctx_refs.iter().enumerate() {
            let mut proto_ids = if i == 0 {
                // it is only necessary to preserve the old prototypes
                // at the first stack frame pushed
                old_proto_ids.clone().unwrap_or_else(Vec::new)
            } else {
                Vec::new()
            };
            if i == ctx_refs.len() - 1 {
                // if a with_prototype was specified, and multiple contexts were pushed,
                // then the with_prototype applies only to the last context pushed, i.e.
                // top most on the stack after all the contexts are pushed - this is also
                // referred to as the "target" of the push by sublimehq - see
                // https://forum.sublimetext.com/t/dev-build-3111/19240/17 for more info
                if let Some(ref p) = pat.with_prototype {
                    proto_ids.push(p.id()?);
                }
            }
            let context_id = r.id()?;
            let context = syntax_set.get_context(&context_id)?;
            let captures = {
                let mut uses_backrefs = context.uses_backrefs;
                if !proto_ids.is_empty() {
                    uses_backrefs = uses_backrefs || proto_ids.iter().any(|id| syntax_set.get_context(id).unwrap().uses_backrefs);
                }
                if uses_backrefs {
                    Some((regions.clone(), line.to_owned()))
                } else {
                    None
                }
            };
            self.stack.push(StateLevel {
                context: context_id,
                prototypes: proto_ids,
                captures,
            });
        }
        Ok(true)
    }
}

#[cfg(feature = "yaml-load")]
#[cfg(test)]
mod tests {
    use super::*;
    use crate::parsing::{SyntaxSet, SyntaxSetBuilder, Scope, ScopeStack};
    use crate::parsing::ScopeStackOp::{Push, Pop, Clear, Restore};
    use crate::util::debug_print_ops;

    const TEST_SYNTAX: &str = include_str!("../../testdata/parser_tests.sublime-syntax");

    #[test]
    fn can_parse_simple() {
        let ss = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
        let mut state = {
            let syntax = ss.find_syntax_by_name("Ruby on Rails").unwrap();
            ParseState::new(syntax)
        };

        let ops1 = ops(&mut state, "module Bob::Wow::Troll::Five; 5; end", &ss);
        let test_ops1 = vec![
            (0, Push(Scope::new("source.ruby.rails").unwrap())),
            (0, Push(Scope::new("meta.module.ruby").unwrap())),
            (0, Push(Scope::new("keyword.control.module.ruby").unwrap())),
            (6, Pop(2)),
            (6, Push(Scope::new("meta.module.ruby").unwrap())),
            (7, Pop(1)),
            (7, Push(Scope::new("meta.module.ruby").unwrap())),
            (7, Push(Scope::new("entity.name.module.ruby").unwrap())),
            (7, Push(Scope::new("support.other.namespace.ruby").unwrap())),
            (10, Pop(1)),
            (10, Push(Scope::new("punctuation.accessor.ruby").unwrap())),
        ];
        assert_eq!(&ops1[0..test_ops1.len()], &test_ops1[..]);

        let ops2 = ops(&mut state, "def lol(wow = 5)", &ss);
        let test_ops2 = vec![
            (0, Push(Scope::new("meta.function.ruby").unwrap())),
            (0, Push(Scope::new("keyword.control.def.ruby").unwrap())),
            (3, Pop(2)),
            (3, Push(Scope::new("meta.function.ruby").unwrap())),
            (4, Push(Scope::new("entity.name.function.ruby").unwrap())),
            (7, Pop(1))
        ];
        assert_eq!(&ops2[0..test_ops2.len()], &test_ops2[..]);
    }

    #[test]
    fn can_parse_yaml() {
        let ps = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
        let mut state = {
            let syntax = ps.find_syntax_by_name("YAML").unwrap();
            ParseState::new(syntax)
        };

        assert_eq!(ops(&mut state, "key: value\n", &ps), vec![
            (0, Push(Scope::new("source.yaml").unwrap())),
            (0, Push(Scope::new("string.unquoted.plain.out.yaml").unwrap())),
            (0, Push(Scope::new("entity.name.tag.yaml").unwrap())),
            (3, Pop(2)),
            (3, Push(Scope::new("punctuation.separator.key-value.mapping.yaml").unwrap())),
            (4, Pop(1)),
            (5, Push(Scope::new("string.unquoted.plain.out.yaml").unwrap())),
            (10, Pop(1)),
        ]);
    }

    #[test]
    fn can_parse_includes() {
        let ss = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
        let mut state = {
            let syntax = ss.find_syntax_by_name("HTML (Rails)").unwrap();
            ParseState::new(syntax)
        };

        let ops = ops(&mut state, "<script>var lol = '<% def wow(", &ss);

        let mut test_stack = ScopeStack::new();
        test_stack.push(Scope::new("text.html.ruby").unwrap());
        test_stack.push(Scope::new("text.html.basic").unwrap());
        test_stack.push(Scope::new("source.js.embedded.html").unwrap());
        test_stack.push(Scope::new("source.js").unwrap());
        test_stack.push(Scope::new("string.quoted.single.js").unwrap());
        test_stack.push(Scope::new("source.ruby.rails.embedded.html").unwrap());
        test_stack.push(Scope::new("meta.function.parameters.ruby").unwrap());

        let mut stack = ScopeStack::new();
        for &(_, ref op) in ops.iter() {
            stack.apply(op).expect("#[cfg(test)]");
        }
        assert_eq!(stack, test_stack);
    }

    #[test]
    fn can_parse_backrefs() {
        let ss = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
        let mut state = {
            let syntax = ss.find_syntax_by_name("Ruby on Rails").unwrap();
            ParseState::new(syntax)
        };

        // For parsing HEREDOC, the "SQL" is captured at the beginning and then used in another
        // regex with a backref, to match the end of the HEREDOC. Note that there can be code
        // after the marker (`.strip`) here.
        assert_eq!(ops(&mut state, "lol = <<-SQL.strip", &ss), vec![
            (0, Push(Scope::new("source.ruby.rails").unwrap())),
            (4, Push(Scope::new("keyword.operator.assignment.ruby").unwrap())),
            (5, Pop(1)),
            (6, Push(Scope::new("string.unquoted.embedded.sql.ruby").unwrap())),
            (6, Push(Scope::new("punctuation.definition.string.begin.ruby").unwrap())),
            (12, Pop(1)),
            (12, Pop(1)),
            (12, Push(Scope::new("string.unquoted.embedded.sql.ruby").unwrap())),
            (12, Push(Scope::new("text.sql.embedded.ruby").unwrap())),
            (12, Clear(ClearAmount::TopN(2))),
            (12, Push(Scope::new("punctuation.accessor.ruby").unwrap())),
            (13, Pop(1)),
            (18, Restore),
        ]);

        assert_eq!(ops(&mut state, "wow", &ss), vec![]);

        assert_eq!(ops(&mut state, "SQL", &ss), vec![
            (0, Pop(1)),
            (0, Push(Scope::new("punctuation.definition.string.end.ruby").unwrap())),
            (3, Pop(1)),
            (3, Pop(1)),
        ]);
    }

    #[test]
    fn can_parse_preprocessor_rules() {
        let ss = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
        let mut state = {
            let syntax = ss.find_syntax_by_name("C").unwrap();
            ParseState::new(syntax)
        };

        assert_eq!(ops(&mut state, "#ifdef FOO", &ss), vec![
            (0, Push(Scope::new("source.c").unwrap())),
            (0, Push(Scope::new("meta.preprocessor.c").unwrap())),
            (0, Push(Scope::new("keyword.control.import.c").unwrap())),
            (6, Pop(1)),
            (10, Pop(1)),
        ]);
        assert_eq!(ops(&mut state, "{", &ss), vec![
            (0, Push(Scope::new("meta.block.c").unwrap())),
            (0, Push(Scope::new("punctuation.section.block.begin.c").unwrap())),
            (1, Pop(1)),
        ]);
        assert_eq!(ops(&mut state, "#else", &ss), vec![
            (0, Push(Scope::new("meta.preprocessor.c").unwrap())),
            (0, Push(Scope::new("keyword.control.import.c").unwrap())),
            (5, Pop(1)),
            (5, Pop(1)),
        ]);
        assert_eq!(ops(&mut state, "{", &ss), vec![
            (0, Push(Scope::new("meta.block.c").unwrap())),
            (0, Push(Scope::new("punctuation.section.block.begin.c").unwrap())),
            (1, Pop(1)),
        ]);
        assert_eq!(ops(&mut state, "#endif", &ss), vec![
            (0, Pop(1)),
            (0, Push(Scope::new("meta.block.c").unwrap())),
            (0, Push(Scope::new("meta.preprocessor.c").unwrap())),
            (0, Push(Scope::new("keyword.control.import.c").unwrap())),
            (6, Pop(2)),
            (6, Pop(2)),
            (6, Push(Scope::new("meta.block.c").unwrap())),
        ]);
        assert_eq!(ops(&mut state, "    foo;", &ss), vec![
            (7, Push(Scope::new("punctuation.terminator.c").unwrap())),
            (8, Pop(1)),
        ]);
        assert_eq!(ops(&mut state, "}", &ss), vec![
            (0, Push(Scope::new("punctuation.section.block.end.c").unwrap())),
            (1, Pop(1)),
            (1, Pop(1)),
        ]);
    }

    #[test]
    fn can_parse_issue25() {
        let ss = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
        let mut state = {
            let syntax = ss.find_syntax_by_name("C").unwrap();
            ParseState::new(syntax)
        };

        // test fix for issue #25
        assert_eq!(ops(&mut state, "struct{estruct", &ss).len(), 10);
    }

    #[test]
    fn can_compare_parse_states() {
        let ss = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
        let syntax = ss.find_syntax_by_name("Java").unwrap();
        let mut state1 = ParseState::new(syntax);
        let mut state2 = ParseState::new(syntax);

        assert_eq!(ops(&mut state1, "class Foo {", &ss).len(), 11);
        assert_eq!(ops(&mut state2, "class Fooo {", &ss).len(), 11);

        assert_eq!(state1, state2);
        ops(&mut state1, "}", &ss);
        assert_ne!(state1, state2);
    }

    #[test]
    fn can_parse_non_nested_clear_scopes() {
        let line = "'hello #simple_cleared_scopes_test world test \\n '";
        let expect = [
            "<source.test>, <example.meta-scope.after-clear-scopes.example>, <example.pushes-clear-scopes.example>",
            "<source.test>, <example.meta-scope.after-clear-scopes.example>, <example.pops-clear-scopes.example>",
            "<source.test>, <string.quoted.single.example>, <constant.character.escape.example>",
        ];
        expect_scope_stacks(line, &expect, TEST_SYNTAX);
    }

    #[test]
    fn can_parse_non_nested_too_many_clear_scopes() {
        let line = "'hello #too_many_cleared_scopes_test world test \\n '";
        let expect = [
            "<example.meta-scope.after-clear-scopes.example>, <example.pushes-clear-scopes.example>",
            "<example.meta-scope.after-clear-scopes.example>, <example.pops-clear-scopes.example>",
            "<source.test>, <string.quoted.single.example>, <constant.character.escape.example>",
        ];
        expect_scope_stacks(line, &expect, TEST_SYNTAX);
    }

    #[test]
    fn can_parse_nested_clear_scopes() {
        let line = "'hello #nested_clear_scopes_test world foo bar test \\n '";
        let expect = [
            "<source.test>, <example.meta-scope.after-clear-scopes.example>, <example.pushes-clear-scopes.example>",
            "<source.test>, <example.meta-scope.cleared-previous-meta-scope.example>, <foo>",
            "<source.test>, <example.meta-scope.after-clear-scopes.example>, <example.pops-clear-scopes.example>",
            "<source.test>, <string.quoted.single.example>, <constant.character.escape.example>",
        ];
        expect_scope_stacks(line, &expect, TEST_SYNTAX);
    }

    #[test]
    fn can_parse_infinite_loop() {
        let line = "#infinite_loop_test 123";
        let expect = [
            "<source.test>, <constant.numeric.test>",
        ];
        expect_scope_stacks(line, &expect, TEST_SYNTAX);
    }

    #[test]
    fn can_parse_infinite_seeming_loop() {
        // See https://github.com/SublimeTextIssues/Core/issues/1190 for an
        // explanation.
        let line = "#infinite_seeming_loop_test hello";
        let expect = [
            "<source.test>, <keyword.test>",
            "<source.test>, <test>, <string.unquoted.test>",
            "<source.test>, <test>, <keyword.control.test>",
        ];
        expect_scope_stacks(line, &expect, TEST_SYNTAX);
    }

    #[test]
    fn can_parse_prototype_that_pops_main() {
        let syntax = r#"
name: test
scope: source.test
contexts:
  prototype:
    # This causes us to pop out of the main context. Sublime Text handles that
    # by pushing main back automatically.
    - match: (?=!)
      pop: true
  main:
    - match: foo
      scope: test.good
"#;

        let line = "foo!";
        let expect = ["<source.test>, <test.good>"];
        expect_scope_stacks(line, &expect, syntax);
    }

    #[test]
    fn can_parse_syntax_with_newline_in_character_class() {
        let syntax = r#"
name: test
scope: source.test
contexts:
  main:
    - match: foo[\n]
      scope: foo.end
    - match: foo
      scope: foo.any
"#;

        let line = "foo";
        let expect = ["<source.test>, <foo.end>"];
        expect_scope_stacks(line, &expect, syntax);

        let line = "foofoofoo";
        let expect = [
            "<source.test>, <foo.any>",
            "<source.test>, <foo.any>",
            "<source.test>, <foo.end>",
        ];
        expect_scope_stacks(line, &expect, syntax);
    }

    #[test]
    fn can_parse_issue120() {
        let syntax = SyntaxDefinition::load_from_str(
            include_str!("../../testdata/embed_escape_test.sublime-syntax"),
            false,
            None
        ).unwrap();

        let line1 = "\"abctest\" foobar";
        let expect1 = [
            "<meta.attribute-with-value.style.html>, <string.quoted.double>, <punctuation.definition.string.begin.html>",
            "<meta.attribute-with-value.style.html>, <source.css>",
            "<meta.attribute-with-value.style.html>, <string.quoted.double>, <punctuation.definition.string.end.html>",
            "<meta.attribute-with-value.style.html>, <source.css>, <test.embedded>",
            "<top-level.test>",
        ];

        expect_scope_stacks_with_syntax(line1, &expect1, syntax.clone());

        let line2 = ">abctest</style>foobar";
        let expect2 = [
            "<meta.tag.style.begin.html>, <punctuation.definition.tag.end.html>",
            "<source.css.embedded.html>, <test.embedded>",
            "<top-level.test>",
        ];
        expect_scope_stacks_with_syntax(line2, &expect2, syntax);
    }

    #[test]
    fn can_parse_non_consuming_pop_that_would_loop() {
        // See https://github.com/trishume/syntect/issues/127
        let syntax = r#"
name: test
scope: source.test
contexts:
  main:
    # This makes us go into "test" without consuming any characters
    - match: (?=hello)
      push: test
  test:
    # If we used this match, we'd go back to "main" without consuming anything,
    # and then back into "test", infinitely looping. ST detects this at this
    # point and ignores this match until at least one character matched.
    - match: (?!world)
      pop: true
    - match: \w+
      scope: test.matched
"#;

        let line = "hello";
        let expect = ["<source.test>, <test.matched>"];
        expect_scope_stacks(line, &expect, syntax);
    }

    #[test]
    fn can_parse_non_consuming_set_and_pop_that_would_loop() {
        let syntax = r#"
name: test
scope: source.test
contexts:
  main:
    # This makes us go into "a" without advancing
    - match: (?=test)
      push: a
  a:
    # This makes us go into "b" without advancing
    - match: (?=t)
      set: b
  b:
    # If we used this match, we'd go back to "main" without having advanced,
    # which means we'd have an infinite loop like with the previous test.
    # So even for a "set", we have to check if we're advancing or not.
    - match: (?=t)
      pop: true
    - match: \w+
      scope: test.matched
"#;

        let line = "test";
        let expect = ["<source.test>, <test.matched>"];
        expect_scope_stacks(line, &expect, syntax);
    }

    #[test]
    fn can_parse_non_consuming_set_after_consuming_push_that_does_not_loop() {
        let syntax = r#"
name: test
scope: source.test
contexts:
  main:
    # This makes us go into "a", but we consumed a character
    - match: t
      push: a
    - match: \w+
      scope: test.matched
  a:
    # This makes us go into "b" without consuming
    - match: (?=e)
      set: b
  b:
    # This match does not result in an infinite loop because we already consumed
    # a character to get into "a", so it's ok to pop back into "main".
    - match: (?=e)
      pop: true
"#;

        let line = "test";
        let expect = ["<source.test>, <test.matched>"];
        expect_scope_stacks(line, &expect, syntax);
    }

    #[test]
    fn can_parse_non_consuming_set_after_consuming_set_that_does_not_loop() {
        let syntax = r#"
name: test
scope: source.test
contexts:
  main:
    - match: (?=hello)
      push: a
    - match: \w+
      scope: test.matched
  a:
    - match: h
      set: b
  b:
    - match: (?=e)
      set: c
  c:
    # This is not an infinite loop because "a" consumed a character, so we can
    # actually pop back into main and then match the rest of the input.
    - match: (?=e)
      pop: true
"#;

        let line = "hello";
        let expect = ["<source.test>, <test.matched>"];
        expect_scope_stacks(line, &expect, syntax);
    }

    #[test]
    fn can_parse_non_consuming_pop_that_would_loop_at_end_of_line() {
        let syntax = r#"
name: test
scope: source.test
contexts:
  main:
    # This makes us go into "test" without consuming, even at the end of line
    - match: ""
      push: test
  test:
    - match: ""
      pop: true
    - match: \w+
      scope: test.matched
"#;

        let line = "hello";
        let expect = ["<source.test>, <test.matched>"];
        expect_scope_stacks(line, &expect, syntax);
    }

    #[test]
    fn can_parse_empty_but_consuming_set_that_does_not_loop() {
        let syntax = r#"
name: test
scope: source.test
contexts:
  main:
    - match: (?=hello)
      push: a
    - match: ello
      scope: test.good
  a:
    # This is an empty match, but it consumed a character (the "h")
    - match: (?=e)
      set: b
  b:
    # .. so it's ok to pop back to main from here
    - match: ""
      pop: true
    - match: ello
      scope: test.bad
"#;

        let line = "hello";
        let expect = ["<source.test>, <test.good>"];
        expect_scope_stacks(line, &expect, syntax);
    }

    #[test]
    fn can_parse_non_consuming_pop_that_does_not_loop() {
        let syntax = r#"
name: test
scope: source.test
contexts:
  main:
    # This is a non-consuming push, so "b" will need to check for a
    # non-consuming pop
    - match: (?=hello)
      push: [b, a]
    - match: ello
      scope: test.good
  a:
    # This pop is ok, it consumed "h"
    - match: (?=e)
      pop: true
  b:
    # This is non-consuming, and we set to "c"
    - match: (?=e)
      set: c
  c:
    # It's ok to pop back to "main" here because we consumed a character in the
    # meantime.
    - match: ""
      pop: true
    - match: ello
      scope: test.bad
"#;

        let line = "hello";
        let expect = ["<source.test>, <test.good>"];
        expect_scope_stacks(line, &expect, syntax);
    }

    #[test]
    fn can_parse_non_consuming_pop_with_multi_push_that_does_not_loop() {
        let syntax = r#"
name: test
scope: source.test
contexts:
  main:
    - match: (?=hello)
      push: [b, a]
    - match: ello
      scope: test.good
  a:
    # This pop is ok, as we're not popping back to "main" yet (which would loop),
    # we're popping to "b"
    - match: ""
      pop: true
    - match: \w+
      scope: test.bad
  b:
    - match: \w+
      scope: test.good
"#;

        let line = "hello";
        let expect = ["<source.test>, <test.good>"];
        expect_scope_stacks(line, &expect, syntax);
    }

    #[test]
    fn can_parse_non_consuming_pop_of_recursive_context_that_does_not_loop() {
        let syntax = r#"
name: test
scope: source.test
contexts:
  main:
    - match: xxx
      scope: test.good
    - include: basic-identifiers

  basic-identifiers:
    - match: '\w+::'
      scope: test.matched
      push: no-type-names

  no-type-names:
      - include: basic-identifiers
      - match: \w+
        scope: test.matched.inside
      # This is a tricky one because when this is the best match,
      # we have two instances of "no-type-names" on the stack, so we're popping
      # back from "no-type-names" to another "no-type-names".
      - match: ''
        pop: true
"#;

        let line = "foo::bar::* xxx";
        let expect = ["<source.test>, <test.good>"];
        expect_scope_stacks(line, &expect, syntax);
    }

    #[test]
    fn can_parse_non_consuming_pop_order() {
        let syntax = r#"
name: test
scope: source.test
contexts:
  main:
    - match: (?=hello)
      push: test
  test:
    # This matches first
    - match: (?=e)
      push: good
    # But this (looping) match replaces it, because it's an earlier match
    - match: (?=h)
      pop: true
    # And this should not replace it, as it's a later match (only matches at
    # the same position can replace looping pops).
    - match: (?=o)
      push: bad
  good:
    - match: \w+
      scope: test.good
  bad:
    - match: \w+
      scope: test.bad
"#;

        let line = "hello";
        let expect = ["<source.test>, <test.good>"];
        expect_scope_stacks(line, &expect, syntax);
    }

    #[test]
    fn can_parse_prototype_with_embed() {
        let syntax = r#"
name: Javadoc
scope: text.html.javadoc
contexts:
  prototype:
    - match: \*
      scope: punctuation.definition.comment.javadoc

  main:
    - meta_include_prototype: false
    - match: /\*\*
      scope: comment.block.documentation.javadoc punctuation.definition.comment.begin.javadoc
      embed: contents
      embed_scope: comment.block.documentation.javadoc text.html.javadoc
      escape: \*/
      escape_captures:
        0: comment.block.documentation.javadoc punctuation.definition.comment.end.javadoc

  contents:
    - match: ''
"#;

        let syntax = SyntaxDefinition::load_from_str(syntax, true, None).unwrap();
        expect_scope_stacks_with_syntax("/** * */", &["<comment.block.documentation.javadoc>, <punctuation.definition.comment.begin.javadoc>", "<comment.block.documentation.javadoc>, <text.html.javadoc>, <punctuation.definition.comment.javadoc>", "<comment.block.documentation.javadoc>, <punctuation.definition.comment.end.javadoc>"], syntax);
    }

    #[test]
    fn can_parse_context_included_in_prototype_via_named_reference() {
        let syntax = r#"
scope: source.test
contexts:
  prototype:
    - match: a
      push: a
    - match: b
      scope: test.bad
  main:
    - match: unused
  # This context is included in the prototype (see `push: a`).
  # Because of that, ST doesn't apply the prototype to this context, so if
  # we're in here the "b" shouldn't match.
  a:
    - match: a
      scope: test.good
"#;

        let stack_states = stack_states(parse("aa b", syntax));
        assert_eq!(stack_states, vec![
            "<source.test>",
            "<source.test>, <test.good>",
            "<source.test>",
        ], "Expected test.bad to not match");
    }

    #[test]
    fn can_parse_with_prototype_set() {
        let syntax = r#"%YAML 1.2
---
scope: source.test-set-with-proto
contexts:
  main:
    - match: a
      scope: a
      set: next1
      with_prototype:
        - match: '1'
          scope: '1'
        - match: '2'
          scope: '2'
        - match: '3'
          scope: '3'
        - match: '4'
          scope: '4'
    - match: '5'
      scope: '5'
      set: [next3, next2]
      with_prototype:
        - match: c
          scope: cwith
  next1:
    - match: b
      scope: b
      set: next2
  next2:
    - match: c
      scope: c
      push: next3
    - match: e
      scope: e
      pop: true
    - match: f
      scope: f
      set: [next1, next2]
  next3:
    - match: d
      scope: d
    - match: (?=e)
      pop: true
    - match: c
      scope: cwithout
"#;

        expect_scope_stacks_with_syntax(
            "a1b2c3d4e5",
            &[
                "<a>", "<1>", "<b>", "<2>", "<c>", "<3>", "<d>", "<4>", "<e>", "<5>"
            ], SyntaxDefinition::load_from_str(syntax, true, None).unwrap()
        );
        expect_scope_stacks_with_syntax(
            "5cfcecbedcdea",
            &[
                "<5>", "<cwith>", "<f>", "<e>", "<b>", "<d>", "<cwithout>", "<a>"
            ], SyntaxDefinition::load_from_str(syntax, true, None).unwrap()
        );
    }

    #[test]
    fn can_parse_issue176() {
        let syntax = r#"
scope: source.dummy
contexts:
  main:
    - match: (test)(?=(foo))(f)
      captures:
        1: test
        2: ignored
        3: f
      push:
        - match: (oo)
          captures:
            1: keyword
"#;

        let syntax = SyntaxDefinition::load_from_str(syntax, true, None).unwrap();
        expect_scope_stacks_with_syntax("testfoo", &["<test>", /*"<ignored>",*/ "<f>", "<keyword>"], syntax);
    }

    #[test]
    fn can_parse_two_with_prototypes_at_same_stack_level() {
        let syntax_yamlstr = r#"
%YAML 1.2
---
# See http://www.sublimetext.com/docs/3/syntax.html
scope: source.example-wp
contexts:
  main:
    - match: a
      scope: a
      push:
        - match: b
          scope: b
          set:
            - match: c
              scope: c
          with_prototype:
            - match: '2'
              scope: '2'
      with_prototype:
        - match: '1'
          scope: '1'
"#;

        let syntax = SyntaxDefinition::load_from_str(syntax_yamlstr, true, None).unwrap();
        expect_scope_stacks_with_syntax("abc12", &["<1>", "<2>"], syntax);
    }

    #[test]
    fn can_parse_two_with_prototypes_at_same_stack_level_set_multiple() {
        let syntax_yamlstr = r#"
%YAML 1.2
---
# See http://www.sublimetext.com/docs/3/syntax.html
scope: source.example-wp
contexts:
  main:
    - match: a
      scope: a
      push:
        - match: b
          scope: b
          set: [context1, context2, context3]
          with_prototype:
            - match: '2'
              scope: '2'
      with_prototype:
        - match: '1'
          scope: '1'
    - match: '1'
      scope: digit1
    - match: '2'
      scope: digit2
  context1:
    - match: e
      scope: e
      pop: true
    - match: '2'
      scope: digit2
  context2:
    - match: d
      scope: d
      pop: true
    - match: '2'
      scope: digit2
  context3:
    - match: c
      scope: c
      pop: true
"#;

        let syntax = SyntaxDefinition::load_from_str(syntax_yamlstr, true, None).unwrap();
        expect_scope_stacks_with_syntax("ab12", &["<1>", "<2>"], syntax.clone());
        expect_scope_stacks_with_syntax("abc12", &["<1>", "<digit2>"], syntax.clone());
        expect_scope_stacks_with_syntax("abcd12", &["<1>", "<digit2>"], syntax.clone());
        expect_scope_stacks_with_syntax("abcde12", &["<digit1>", "<digit2>"], syntax);
    }

    #[test]
    fn can_parse_two_with_prototypes_at_same_stack_level_updated_captures() {
        let syntax_yamlstr = r#"
%YAML 1.2
---
# See http://www.sublimetext.com/docs/3/syntax.html
scope: source.example-wp
contexts:
  main:
    - match: (a)
      scope: a
      push:
        - match: (b)
          scope: b
          set:
            - match: c
              scope: c
          with_prototype:
            - match: d
              scope: d
      with_prototype:
        - match: \1
          scope: '1'
          pop: true
"#;

        let syntax = SyntaxDefinition::load_from_str(syntax_yamlstr, true, None).unwrap();
        expect_scope_stacks_with_syntax("aa", &["<a>", "<1>"], syntax.clone());
        expect_scope_stacks_with_syntax("abcdb", &["<a>", "<b>", "<c>", "<d>", "<1>"], syntax);
    }

    #[test]
    fn can_parse_two_with_prototypes_at_same_stack_level_updated_captures_ignore_unexisting() {
        let syntax_yamlstr = r#"
%YAML 1.2
---
# See http://www.sublimetext.com/docs/3/syntax.html
scope: source.example-wp
contexts:
  main:
    - match: (a)(-)
      scope: a
      push:
        - match: (b)
          scope: b
          set:
            - match: c
              scope: c
          with_prototype:
            - match: d
              scope: d
      with_prototype:
        - match: \2
          scope: '2'
          pop: true
        - match: \1
          scope: '1'
          pop: true
"#;

        let syntax = SyntaxDefinition::load_from_str(syntax_yamlstr, true, None).unwrap();
        expect_scope_stacks_with_syntax("a--", &["<a>", "<2>"], syntax.clone());
        // it seems that when ST encounters a non existing pop backreference, it just pops back to the with_prototype's original parent context - i.e. cdb is unscoped
        // TODO: it would be useful to have syntest functionality available here for easier testing and clarity
        expect_scope_stacks_with_syntax("a-bcdba-", &["<a>", "<b>"], syntax);
    }

    #[test]
    fn can_parse_syntax_with_eol_and_newline() {
        let syntax = r#"
name: test
scope: source.test
contexts:
  main:
    - match: foo$\n
      scope: foo.newline
"#;

        let line = "foo";
        let expect = ["<source.test>, <foo.newline>"];
        expect_scope_stacks(line, &expect, syntax);
    }

    #[test]
    fn can_parse_syntax_with_eol_only() {
        let syntax = r#"
name: test
scope: source.test
contexts:
  main:
    - match: foo$
      scope: foo.newline
"#;

        let line = "foo";
        let expect = ["<source.test>, <foo.newline>"];
        expect_scope_stacks(line, &expect, syntax);
    }

    #[test]
    fn can_parse_syntax_with_beginning_of_line() {
        let syntax = r#"
name: test
scope: source.test
contexts:
  main:
    - match: \w+
      scope: word
      push:
        # this should not match at the end of the line
        - match: ^\s*$
          pop: true
        - match: =+
          scope: heading
          pop: true
    - match: .*
      scope: other
"#;

        let syntax_newlines = SyntaxDefinition::load_from_str(syntax, true, None).unwrap();
        let syntax_set = link(syntax_newlines);

        let mut state = ParseState::new(&syntax_set.syntaxes()[0]);
        assert_eq!(ops(&mut state, "foo\n", &syntax_set), vec![
            (0, Push(Scope::new("source.test").unwrap())),
            (0, Push(Scope::new("word").unwrap())),
            (3, Pop(1))
        ]);
        assert_eq!(ops(&mut state, "===\n", &syntax_set), vec![
            (0, Push(Scope::new("heading").unwrap())),
            (3, Pop(1))
        ]);

        assert_eq!(ops(&mut state, "bar\n", &syntax_set), vec![
            (0, Push(Scope::new("word").unwrap())),
            (3, Pop(1))
        ]);
        // This should result in popping out of the context
        assert_eq!(ops(&mut state, "\n", &syntax_set), vec![]);
        // So now this matches other
        assert_eq!(ops(&mut state, "====\n", &syntax_set), vec![
            (0, Push(Scope::new("other").unwrap())),
            (4, Pop(1))
        ]);
    }

    #[test]
    fn can_parse_syntax_with_comment_and_eol() {
        let syntax = r#"
name: test
scope: source.test
contexts:
  main:
    - match: (//).*$
      scope: comment.line.double-slash
"#;

        let syntax_newlines = SyntaxDefinition::load_from_str(syntax, true, None).unwrap();
        let syntax_set = link(syntax_newlines);

        let mut state = ParseState::new(&syntax_set.syntaxes()[0]);
        assert_eq!(ops(&mut state, "// foo\n", &syntax_set), vec![
            (0, Push(Scope::new("source.test").unwrap())),
            (0, Push(Scope::new("comment.line.double-slash").unwrap())),
            // 6 is important here, should not be 7. The pattern should *not* consume the newline,
            // but instead match before it. This is important for whitespace-sensitive syntaxes
            // where newlines terminate statements such as Scala.
            (6, Pop(1))
        ]);
    }

    #[test]
    fn can_parse_text_with_unicode_to_skip() {
        let syntax = r#"
name: test
scope: source.test
contexts:
  main:
    - match: (?=.)
      push: test
  test:
    - match: (?=.)
      pop: true
    - match: x
      scope: test.good
"#;

        // U+03C0 GREEK SMALL LETTER PI, 2 bytes in UTF-8
        expect_scope_stacks("\u{03C0}x", &["<source.test>, <test.good>"], syntax);
        // U+0800 SAMARITAN LETTER ALAF, 3 bytes in UTF-8
        expect_scope_stacks("\u{0800}x", &["<source.test>, <test.good>"], syntax);
        // U+1F600 GRINNING FACE, 4 bytes in UTF-8
        expect_scope_stacks("\u{1F600}x", &["<source.test>, <test.good>"], syntax);
    }

    #[test]
    fn can_include_backrefs() {
        let syntax = SyntaxDefinition::load_from_str(r#"
                name: Backref Include Test
                scope: source.backrefinc
                contexts:
                  main:
                    - match: (a)
                      scope: a
                      push: context1
                  context1:
                    - include: context2
                  context2:
                    - match: \1
                      scope: b
                      pop: true
                "#, true, None).unwrap();

        expect_scope_stacks_with_syntax("aa", &["<a>", "<b>"], syntax);
    }
    
    #[test]
    fn can_include_nested_backrefs() {
        let syntax = SyntaxDefinition::load_from_str(r#"
                name: Backref Include Test
                scope: source.backrefinc
                contexts:
                  main:
                    - match: (a)
                      scope: a
                      push: context1
                  context1:
                    - include: context3
                  context3:
                    - include: context2
                  context2:
                    - match: \1
                      scope: b
                      pop: true
                "#, true, None).unwrap();

        expect_scope_stacks_with_syntax("aa", &["<a>", "<b>"], syntax);
    }

    fn expect_scope_stacks(line_without_newline: &str, expect: &[&str], syntax: &str) {
        println!("Parsing with newlines");
        let line_with_newline = format!("{}\n", line_without_newline);
        let syntax_newlines = SyntaxDefinition::load_from_str(syntax, true, None).unwrap();
        expect_scope_stacks_with_syntax(&line_with_newline, expect, syntax_newlines);

        println!("Parsing without newlines");
        let syntax_nonewlines = SyntaxDefinition::load_from_str(syntax, false, None).unwrap();
        expect_scope_stacks_with_syntax(line_without_newline, expect, syntax_nonewlines);
    }

    fn expect_scope_stacks_with_syntax(line: &str, expect: &[&str], syntax: SyntaxDefinition) {
        // check that each expected scope stack appears at least once while parsing the given test line

        let syntax_set = link(syntax);
        let mut state = ParseState::new(&syntax_set.syntaxes()[0]);
        let ops = ops(&mut state, line, &syntax_set);
        expect_scope_stacks_for_ops(ops, expect);
    }

    fn expect_scope_stacks_for_ops(ops: Vec<(usize, ScopeStackOp)>, expect: &[&str]) {
        let mut criteria_met = Vec::new();
        for stack_str in stack_states(ops) {
            println!("{}", stack_str);
            for expectation in expect.iter() {
                if stack_str.contains(expectation) {
                    criteria_met.push(expectation);
                }
            }
        }
        if let Some(missing) = expect.iter().find(|e| !criteria_met.contains(e)) {
            panic!("expected scope stack '{}' missing", missing);
        }
    }

    fn parse(line: &str, syntax: &str) -> Vec<(usize, ScopeStackOp)> {
        let syntax = SyntaxDefinition::load_from_str(syntax, true, None).unwrap();
        let syntax_set = link(syntax);

        let mut state = ParseState::new(&syntax_set.syntaxes()[0]);
        ops(&mut state, line, &syntax_set)
    }

    fn link(syntax: SyntaxDefinition) -> SyntaxSet {
        let mut builder = SyntaxSetBuilder::new();
        builder.add(syntax);
        builder.build()
    }

    fn ops(state: &mut ParseState, line: &str, syntax_set: &SyntaxSet) -> Vec<(usize, ScopeStackOp)> {
        let ops = state.parse_line(line, syntax_set).expect("#[cfg(test)]");
        debug_print_ops(line, &ops);
        ops
    }

    fn stack_states(ops: Vec<(usize, ScopeStackOp)>) -> Vec<String> {
        let mut states = Vec::new();
        let mut stack = ScopeStack::new();
        for &(_, ref op) in ops.iter() {
            stack.apply(op).expect("#[cfg(test)]");
            let scopes: Vec<String> = stack.as_slice().iter().map(|s| format!("{:?}", s)).collect();
            let stack_str = scopes.join(", ");
            states.push(stack_str);
        }
        states
    }
}