Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tokenise error messages in Console lexer #1498

Merged
merged 2 commits into from Apr 14, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
90 changes: 53 additions & 37 deletions lib/rouge/lexers/console.rb
Expand Up @@ -9,12 +9,14 @@ module Lexers
# line before passing the remainder of the line to the language lexer for
# the shell (by default, the {Shell} lexer).
#
# The {ConsoleLexer} class accepts four options:
# The {ConsoleLexer} class accepts five options:
# 1. **lang**: the shell language to lex (default: `shell`);
# 2. **output**: the output language (default: `plaintext?token=Generic.Output`);
# 3. **prompt**: comma-separated list of strings that indicate the end of a
# prompt (default: `$,#,>,;`);
# 4. **comments**: whether to enable comments.
# 5. **error**: comma-separated list of strings that indicate the start of an
# error message
#
# The comments option, if enabled, will lex lines that begin with a `#` as a
# comment. Please note that this option will only work if the prompt is
Expand All @@ -39,27 +41,38 @@ class ConsoleLexer < Lexer
tag 'console'
aliases 'terminal', 'shell_session', 'shell-session'
filenames '*.cap'
desc 'A generic lexer for shell sessions. Accepts ?lang and ?output lexer options, a ?prompt option, and ?comments to enable # comments.'
desc 'A generic lexer for shell sessions. Accepts ?lang and ?output lexer options, a ?prompt option, ?comments to enable # comments, and ?error to handle error messages.'

option :lang, 'the shell language to lex (default: shell)'
option :output, 'the output language (default: plaintext?token=Generic.Output)'
option :prompt, 'comma-separated list of strings that indicate the end of a prompt. (default: $,#,>,;)'
option :comments, 'enable hash-comments at the start of a line - otherwise interpreted as a prompt. (default: false, implied by ?prompt not containing `#`)'
option :error, 'comma-separated list of strings that indicate the start of an error message'

def initialize(*)
super
@prompt = list_option(:prompt) { nil }
@lang = lexer_option(:lang) { 'shell' }
@output = lexer_option(:output) { PlainText.new(token: Generic::Output) }
@comments = bool_option(:comments) { :guess }
@error = list_option(:error) { nil }
end

def prompt_regex
@prompt_regex ||= begin
/^#{prompt_prefix_regex}(?:#{end_chars.map(&Regexp.method(:escape)).join('|')})/
# whether to allow comments. if manually specifying a prompt that isn't
# simply "#", we flag this to on
def allow_comments?
case @comments
when :guess
@prompt && !@prompt.empty? && !end_chars.include?('#')
else
@comments
end
end

def comment_regex
/\A\s*?#/
end

def end_chars
@end_chars ||= if @prompt.any?
@prompt.reject { |c| c.empty? }
Expand All @@ -70,22 +83,9 @@ def end_chars
end
end

# whether to allow comments. if manually specifying a prompt that isn't
# simply "#", we flag this to on
def allow_comments?
case @comments
when :guess
@prompt && !@prompt.empty? && !end_chars.include?('#')
else
@comments
end
end

def prompt_prefix_regex
if allow_comments?
/[^<#]*?/m
else
/.*?/m
def error_regex
@error_regex ||= if @error.any?
/^(?:#{@error.map(&Regexp.method(:escape)).join('|')})/
end
end

Expand All @@ -102,6 +102,10 @@ def lang_lexer
end
end

def line_regex
/(\\.|[^\\])*?(\n|$)/m
end

def output_lexer
@output_lexer ||= case @output
when nil
Expand All @@ -115,22 +119,6 @@ def output_lexer
end
end

def line_regex
/(\\.|[^\\])*?(\n|$)/m
end

def comment_regex
/\A\s*?#/
end

def stream_tokens(input, &output)
input = StringScanner.new(input)
lang_lexer.reset!
output_lexer.reset!

process_line(input, &output) while !input.eos?
end

def process_line(input, &output)
input.scan(line_regex)

Expand Down Expand Up @@ -162,13 +150,41 @@ def process_line(input, &output)
lang_lexer.reset!

yield Comment, input[0]
elsif error_regex =~ input[0]
puts "console: matched error #{input[0].inspect}" if @debug
output_lexer.reset!
lang_lexer.reset!

yield Generic::Error, input[0]
else
puts "console: matched output #{input[0].inspect}" if @debug
lang_lexer.reset!

output_lexer.continue_lex(input[0], &output)
end
end

def prompt_prefix_regex
if allow_comments?
/[^<#]*?/m
else
/.*?/m
end
end

def prompt_regex
@prompt_regex ||= begin
/^#{prompt_prefix_regex}(?:#{end_chars.map(&Regexp.method(:escape)).join('|')})/
end
end

def stream_tokens(input, &output)
input = StringScanner.new(input)
lang_lexer.reset!
output_lexer.reset!

process_line(input, &output) while !input.eos?
end
end
end
end
10 changes: 10 additions & 0 deletions spec/lexers/console_spec.rb
Expand Up @@ -22,6 +22,16 @@
['Text', 'foo']
end

it 'parses a custom error' do
subject_with_options = klass.new({ error: 'No command,Unhandled' })
assert_tokens_equal 'No command \'foo\' found, did you mean:', subject_with_options,
['Generic.Error', 'No command \'foo\' found, did you mean:']
assert_tokens_equal 'Unhandled condition in test.lisp', subject_with_options,
['Generic.Error', 'Unhandled condition in test.lisp']
assert_tokens_equal 'foo', subject_with_options,
['Generic.Output', 'foo']
end

it 'parses single-line comments' do
subject_with_options = klass.new({ comments: true })
assert_tokens_equal '# this is a comment', subject_with_options,
Expand Down