From 5944ff9442b1ccdc467a811693f804236302a180 Mon Sep 17 00:00:00 2001 From: Martin Nordholts Date: Tue, 1 Mar 2022 06:58:36 +0100 Subject: [PATCH 1/2] Fall back to 'Plain Text' if a referenced syntax is missing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The consequence of this change is that if `syntect` encounters a syntax that references another syntax that is not present, `syntect` will fallback to Plain Text syntax instead of erroring/panicking. Falling back to Plain Text in cases like this seems to be what Sublime Text is doing too. For example, `bat` has a syntax for Vue, but Vue references a syntax with scope `text.pug`, which is not included in bat (by default). So if `bat` encounters the following `vue-with-pug.vue` file: ``` ``` `bat` currently panics with: ``` % bat ~/Desktop/vue-with-pug.vue ───────┬──────────────────────────────────────────────────────────────── │ File: /Users/martin/Desktop/vue-with-pug.vue │ Size: 140 B ───────┼──────────────────────────────────────────────────────────────── thread 'main' panicked at 'Can only call resolve on linked references: ByScope { scope: , sub_context: None }', /Users/martin/.cargo/registry/src/github.com-1ecc6299db9ec823/syntect-4.6.0/src/parsing/syntax_definition.rs:191:18 note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace ``` With this change, instead of panicking, `bat` will print the file with the pug parts highlighted as Plain Text. --- src/parsing/syntax_definition.rs | 7 +++ src/parsing/syntax_set.rs | 90 +++++++++++++++++++++++++++++--- src/parsing/yaml_load.rs | 41 ++++++++++++--- 3 files changed, 123 insertions(+), 15 deletions(-) diff --git a/src/parsing/syntax_definition.rs b/src/parsing/syntax_definition.rs index cba6808f..7690dc76 100644 --- a/src/parsing/syntax_definition.rs +++ b/src/parsing/syntax_definition.rs @@ -111,11 +111,18 @@ pub enum ContextReference { ByScope { scope: Scope, sub_context: Option, + /// `true` if this reference by scope is part of an `embed` for which + /// there is an `escape`. In other words a reference for a context for + /// which there "always is a way out". Enables falling back to `Plain + /// Text` syntax in case the referenced scope is missing. + with_escape: bool, }, #[non_exhaustive] File { name: String, sub_context: Option, + /// Same semantics as for [`Self::ByScope::with_escape`]. + with_escape: bool, }, #[non_exhaustive] Inline(String), diff --git a/src/parsing/syntax_set.rs b/src/parsing/syntax_set.rs index 9dabb04f..90c3c865 100644 --- a/src/parsing/syntax_set.rs +++ b/src/parsing/syntax_set.rs @@ -749,15 +749,15 @@ impl SyntaxSetBuilder { all_context_ids[syntax_index].get(s) } } - ByScope { scope, ref sub_context } => { - Self::find_id(sub_context, all_context_ids, syntaxes, |index_and_syntax| { + ByScope { scope, ref sub_context, with_escape } => { + Self::with_plain_text_fallback(all_context_ids, syntaxes, with_escape, Self::find_id(sub_context, all_context_ids, syntaxes, |index_and_syntax| { index_and_syntax.1.scope == scope - }) + })) } - File { ref name, ref sub_context } => { - Self::find_id(sub_context, all_context_ids, syntaxes, |index_and_syntax| { + File { ref name, ref sub_context, with_escape } => { + Self::with_plain_text_fallback(all_context_ids, syntaxes, with_escape, Self::find_id(sub_context, all_context_ids, syntaxes, |index_and_syntax| { &index_and_syntax.1.name == name - }) + })) } Direct(_) => None, }; @@ -767,6 +767,32 @@ impl SyntaxSetBuilder { } } + fn with_plain_text_fallback<'a>( + all_context_ids: &'a [HashMap], + syntaxes: &'a [SyntaxReference], + with_escape: bool, + context_id: Option<&'a ContextId>, + ) -> Option<&'a ContextId> { + context_id.or_else(|| { + if with_escape { + // If we keep this reference unresolved, syntect will crash + // when it encounters the reference. Rather than crashing, + // we instead fall back to "Plain Text". This seems to be + // how Sublime Text behaves. It should be a safe thing to do + // since `embed`s always includes an `escape` to get out of + // the `embed`. + Self::find_id( + &None, + all_context_ids, + syntaxes, + |index_and_syntax| index_and_syntax.1.name == "Plain Text", + ) + } else { + None + } + }) + } + fn find_id<'a>( sub_context: &Option, all_context_ids: &'a [HashMap], @@ -954,6 +980,56 @@ mod tests { assert_ops_contain(&ops, &expected); } + #[test] + fn falls_back_to_plain_text_when_embedded_scope_is_missing() { + test_plain_text_fallback(r#" + name: Z + scope: source.z + file_extensions: [z] + contexts: + main: + - match: 'z' + scope: z + - match: 'go_x' + embed: scope:does.not.exist + escape: 'leave_x' + "#); + } + + #[test] + fn falls_back_to_plain_text_when_embedded_file_is_missing() { + test_plain_text_fallback(r#" + name: Z + scope: source.z + file_extensions: [z] + contexts: + main: + - match: 'z' + scope: z + - match: 'go_x' + embed: DoesNotExist.sublime-syntax + escape: 'leave_x' + "#); + } + + fn test_plain_text_fallback(syntax_definition: &str) { + let syntax = + SyntaxDefinition::load_from_str(syntax_definition, true, None).unwrap(); + + let mut builder = SyntaxSetBuilder::new(); + builder.add_plain_text_syntax(); + builder.add(syntax); + let syntax_set = builder.build(); + + let syntax = syntax_set.find_syntax_by_extension("z").unwrap(); + let mut parse_state = ParseState::new(syntax); + let ops = parse_state.parse_line("z go_x x leave_x z", &syntax_set); + let expected_text_plain = (6, ScopeStackOp::Push(Scope::new("text.plain").unwrap())); + assert_ops_contain(&ops, &expected_text_plain); + let expected_text_plain_pop = (9, ScopeStackOp::Pop(1)); + assert_ops_contain(&ops, &expected_text_plain_pop); + } + #[test] fn can_find_unlinked_contexts() { let syntax_set = { @@ -974,7 +1050,7 @@ mod tests { let unlinked_contexts : Vec = syntax_set.find_unlinked_contexts().into_iter().collect(); assert_eq!(unlinked_contexts.len(), 1); - assert_eq!(unlinked_contexts[0], "Syntax 'A' with scope 'source.a' has unresolved context reference ByScope { scope: , sub_context: Some(\"main\") }"); + assert_eq!(unlinked_contexts[0], "Syntax 'A' with scope 'source.a' has unresolved context reference ByScope { scope: , sub_context: Some(\"main\"), with_escape: false }"); } #[test] diff --git a/src/parsing/yaml_load.rs b/src/parsing/yaml_load.rs index 32146183..868a63f3 100644 --- a/src/parsing/yaml_load.rs +++ b/src/parsing/yaml_load.rs @@ -207,7 +207,7 @@ impl SyntaxDefinition { if !is_special { if let Ok(x) = get_key(map, "include", Some) { let reference = SyntaxDefinition::parse_reference( - x, state, contexts, namer)?; + x, state, contexts, namer, false)?; context.patterns.push(Pattern::Include(reference)); } else { let pattern = SyntaxDefinition::parse_match_pattern( @@ -228,7 +228,8 @@ impl SyntaxDefinition { fn parse_reference(y: &Yaml, state: &mut ParserState<'_>, contexts: &mut HashMap, - namer: &mut ContextNamer) + namer: &mut ContextNamer, + with_escape: bool) -> Result { if let Some(s) = y.as_str() { let parts: Vec<&str> = s.split('#').collect(); @@ -243,6 +244,7 @@ impl SyntaxDefinition { .build(&parts[0][6..]) .map_err(ParseSyntaxError::InvalidScope)?, sub_context, + with_escape, }) } else if parts[0].ends_with(".sublime-syntax") { let stem = Path::new(parts[0]) @@ -252,6 +254,7 @@ impl SyntaxDefinition { Ok(ContextReference::File { name: stem.to_owned(), sub_context, + with_escape, }) } else { Ok(ContextReference::Named(parts[0].to_owned())) @@ -320,7 +323,7 @@ impl SyntaxDefinition { namer, )?; MatchOperation::Push(vec![ContextReference::Inline(escape_context), - SyntaxDefinition::parse_reference(y, state, contexts, namer)?]) + SyntaxDefinition::parse_reference(y, state, contexts, namer, true)?]) } else { return Err(ParseSyntaxError::MissingMandatoryKey("escape")); } @@ -375,10 +378,10 @@ impl SyntaxDefinition { y.as_vec() .unwrap() .iter() - .map(|x| SyntaxDefinition::parse_reference(x, state, contexts, namer)) + .map(|x| SyntaxDefinition::parse_reference(x, state, contexts, namer, false)) .collect() } else { - let reference = SyntaxDefinition::parse_reference(y, state, contexts, namer)?; + let reference = SyntaxDefinition::parse_reference(y, state, contexts, namer, false)?; Ok(vec![reference]) } } @@ -912,10 +915,15 @@ mod tests { // this is sadly necessary because Context is not Eq because of the Regex let expected = MatchOperation::Push(vec![ Named("string".to_owned()), - ByScope { scope: Scope::new("source.c").unwrap(), sub_context: Some("main".to_owned()) }, + ByScope { + scope: Scope::new("source.c").unwrap(), + sub_context: Some("main".to_owned()), + with_escape: false, + }, File { name: "CSS".to_owned(), - sub_context: Some("rule-list-body".to_owned()) + sub_context: Some("rule-list-body".to_owned()), + with_escape: false, }, ]); assert_eq!(format!("{:?}", match_pat.operation), @@ -952,7 +960,7 @@ mod tests { pop: true "#,false, None).unwrap(); - let def_with_embed = SyntaxDefinition::load_from_str(r#" + let mut def_with_embed = SyntaxDefinition::load_from_str(r#" name: C scope: source.c file_extensions: [c, h] @@ -968,6 +976,23 @@ mod tests { escape: (?i)(?= Date: Sat, 12 Mar 2022 09:52:08 +0100 Subject: [PATCH 2/2] Assert more strictly in test_plain_text_fallback() --- src/parsing/syntax_set.rs | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/parsing/syntax_set.rs b/src/parsing/syntax_set.rs index 90c3c865..8cab17df 100644 --- a/src/parsing/syntax_set.rs +++ b/src/parsing/syntax_set.rs @@ -1024,10 +1024,16 @@ mod tests { let syntax = syntax_set.find_syntax_by_extension("z").unwrap(); let mut parse_state = ParseState::new(syntax); let ops = parse_state.parse_line("z go_x x leave_x z", &syntax_set); - let expected_text_plain = (6, ScopeStackOp::Push(Scope::new("text.plain").unwrap())); - assert_ops_contain(&ops, &expected_text_plain); - let expected_text_plain_pop = (9, ScopeStackOp::Pop(1)); - assert_ops_contain(&ops, &expected_text_plain_pop); + let expected_ops = vec![ + (0, ScopeStackOp::Push(Scope::new("source.z").unwrap())), + (0, ScopeStackOp::Push(Scope::new("z").unwrap())), + (1, ScopeStackOp::Pop(1)), + (6, ScopeStackOp::Push(Scope::new("text.plain").unwrap())), + (9, ScopeStackOp::Pop(1)), + (17, ScopeStackOp::Push(Scope::new("z").unwrap())), + (18, ScopeStackOp::Pop(1)), + ]; + assert_eq!(ops, expected_ops); } #[test]