From 8ffc50bc70e8eaea2993e449807c34b73d77ce5c Mon Sep 17 00:00:00 2001 From: John Fairhurst Date: Sun, 13 Oct 2019 23:57:11 +0100 Subject: [PATCH] Handle Guesser::Ambiguous in Markdown context (#1349) If more than one lexer is selected when guessing the language of provided code, `Lexer.find_fancy` will throw a `Guesser::Ambiguous` error. When lexing in the context of Markdown, this commit causes Rouge to instead select the first of the candidates. --- lib/rouge/lexer.rb | 3 +++ lib/rouge/lexers/markdown.rb | 8 +++++++- lib/rouge/plugins/redcarpet.rb | 8 +++++++- spec/lexers/markdown_spec.rb | 8 ++++++++ spec/plugins/redcarpet_spec.rb | 10 ++++++++++ 5 files changed, 35 insertions(+), 2 deletions(-) diff --git a/lib/rouge/lexer.rb b/lib/rouge/lexer.rb index 5f4160dbe0..77445fb511 100644 --- a/lib/rouge/lexer.rb +++ b/lib/rouge/lexer.rb @@ -49,6 +49,9 @@ def find(name) # # Lexer.find_fancy('guess', "#!/bin/bash\necho Hello, world") # + # If the code matches more than one lexer then Guesser::Ambiguous + # is raised. + # # This is used in the Redcarpet plugin as well as Rouge's own # markdown lexer for highlighting internal code blocks. # diff --git a/lib/rouge/lexers/markdown.rb b/lib/rouge/lexers/markdown.rb index d8fb6d29d1..4feb08bc8f 100644 --- a/lib/rouge/lexers/markdown.rb +++ b/lib/rouge/lexers/markdown.rb @@ -34,7 +34,13 @@ def html rule %r/^([ \t]*)(```|~~~)([^\n]*\n)((.*?)(\2))?/m do |m| name = m[3].strip - sublexer = Lexer.find_fancy(name.empty? ? "guess" : name, m[5], @options) + sublexer = + begin + Lexer.find_fancy(name.empty? ? "guess" : name, m[5], @options) + rescue Guesser::Ambiguous => e + e.alternatives.first.new(@options) + end + sublexer ||= PlainText.new(@options.merge(:token => Str::Backtick)) sublexer.reset! diff --git a/lib/rouge/plugins/redcarpet.rb b/lib/rouge/plugins/redcarpet.rb index f552acacea..2558e1b603 100644 --- a/lib/rouge/plugins/redcarpet.rb +++ b/lib/rouge/plugins/redcarpet.rb @@ -9,7 +9,13 @@ module Rouge module Plugins module Redcarpet def block_code(code, language) - lexer = Lexer.find_fancy(language, code) || Lexers::PlainText + lexer = + begin + Lexer.find_fancy(language, code) + rescue Guesser::Ambiguous => e + e.alternatives.first + end + lexer ||= Lexers::PlainText # XXX HACK: Redcarpet strips hard tabs out of code blocks, # so we assume you're not using leading spaces that aren't tabs, diff --git a/spec/lexers/markdown_spec.rb b/spec/lexers/markdown_spec.rb index 33a67e913f..660c0d9e3d 100644 --- a/spec/lexers/markdown_spec.rb +++ b/spec/lexers/markdown_spec.rb @@ -39,6 +39,14 @@ assert_has_token("Comment.Single","```\n#!/usr/bin/env ruby\n```\n") end + it 'picks a sub-lexer when the code-block-content is ambiguous' do + source = "Index: ): Awaitable<\n" + assert_raises Rouge::Guesser::Ambiguous do + Rouge::Lexer.find_fancy(nil, source) + end + assert_no_errors "```\n#{source}```\n" + end + it 'recognizes backticks instead of code block if inside string' do assert_has_token("Literal.String.Backtick","\nx```ruby\nfoo\n```\n") deny_has_token("Name.Label","\nx```ruby\nfoo\n```\n") diff --git a/spec/plugins/redcarpet_spec.rb b/spec/plugins/redcarpet_spec.rb index 2b0e9eb690..e5116a4f68 100644 --- a/spec/plugins/redcarpet_spec.rb +++ b/spec/plugins/redcarpet_spec.rb @@ -42,6 +42,16 @@ assert { result.include?(%(
)) }
   end
 
+  it 'chooses when a guess is ambiguous' do
+    result = markdown.render <<-mkd
+``` guess
+Index: ): Awaitable<
+```
+    mkd
+
+    assert { result.include?(%(