Skip to content

Commit

Permalink
Merge pull request #141 from rubychan/lua-scanner
Browse files Browse the repository at this point in the history
Lua scanner, tweaked (finally!)
  • Loading branch information
korny committed Jun 22, 2013
2 parents 546b489 + 90c401c commit 1e330f1
Show file tree
Hide file tree
Showing 6 changed files with 284 additions and 2 deletions.
2 changes: 2 additions & 0 deletions Changes.textile
Expand Up @@ -4,6 +4,7 @@ p=. _This files lists all changes in the CodeRay library since the 0.9.8 release

h2. Changes in 1.1

* New scanner: Lua [#21, #22, thanks to Quintus]
* New scanner: Sass [#93]
* New scanner: Taskpaper [#39, thanks to shimomura]
* Diff scanner: Highlight inline changes in multi-line changes [#99]
Expand All @@ -17,6 +18,7 @@ h2. Changes in 1.1
* @CodeRay::TokenKinds@ should not be frozen [#130, thanks to Gavin Kistner]
* New token type @:id@ for CSS/Sass [#27]
* New token type @:done@ for Taskpaper [#39]
* New token type @:map@ for Lua, introducing a nice nested-shades trick [#22, thanks to Quintus and nathany]
* Display line numbers in HTML @:table@ mode even for single-line code (remove special case) [#41, thanks to Ariejan de Vroom]
* Override Bootstrap's pre word-break setting for line numbers [#102, thanks to lightswitch05]
* Fixed @:docstring@ token type style
Expand Down
4 changes: 2 additions & 2 deletions lib/coderay/encoders/debug_lint.rb
Expand Up @@ -35,7 +35,7 @@ def begin_group kind
end

def end_group kind
raise IncorrectTokenGroupNesting, "We are inside #{@opened.inspect}, not #{kind}" if @opened.pop != kind
raise IncorrectTokenGroupNesting, "We are inside #{@opened.inspect}, not #{kind} (end_group)" if @opened.pop != kind
super
end

Expand All @@ -45,7 +45,7 @@ def begin_line kind
end

def end_line kind
raise IncorrectTokenGroupNesting, "We are inside #{@opened.inspect}, not #{kind}" if @opened.pop != kind
raise IncorrectTokenGroupNesting, "We are inside #{@opened.inspect}, not #{kind} (end_line)" if @opened.pop != kind
super
end

Expand Down
1 change: 1 addition & 0 deletions lib/coderay/helpers/file_type.rb
Expand Up @@ -96,6 +96,7 @@ def shebang filename
'java' => :java,
'js' => :java_script,
'json' => :json,
'lua' => :lua,
'mab' => :ruby,
'pas' => :delphi,
'patch' => :diff,
Expand Down
275 changes: 275 additions & 0 deletions lib/coderay/scanners/lua.rb
@@ -0,0 +1,275 @@
# encoding: utf-8

module CodeRay
module Scanners

# Scanner for the Lua[http://lua.org] programming lanuage.
#
# The language’s complete syntax is defined in
# {the Lua manual}[http://www.lua.org/manual/5.2/manual.html],
# which is what this scanner tries to conform to.
class Lua < Scanner

register_for :lua
file_extension 'lua'
title 'Lua'

# Keywords used in Lua.
KEYWORDS = %w[and break do else elseif end
for function goto if in
local not or repeat return
then until while
]

# Constants set by the Lua core.
PREDEFINED_CONSTANTS = %w[false true nil]

# The expressions contained in this array are parts of Lua’s `basic'
# library. Although it’s not entirely necessary to load that library,
# it is highly recommended and one would have to provide own implementations
# of some of these expressions if one does not do so. They however aren’t
# keywords, neither are they constants, but nearly predefined, so they
# get tagged as `predefined' rather than anything else.
#
# This list excludes values of form `_UPPERCASE' because the Lua manual
# requires such identifiers to be reserved by Lua anyway and they are
# highlighted directly accordingly, without the need for specific
# identifiers to be listed here.
PREDEFINED_EXPRESSIONS = %w[
assert collectgarbage dofile error getmetatable
ipairs load loadfile next pairs pcall print
rawequal rawget rawlen rawset select setmetatable
tonumber tostring type xpcall
]

# Automatic token kind selection for normal words.
IDENT_KIND = CodeRay::WordList.new(:ident).
add(KEYWORDS, :keyword).
add(PREDEFINED_CONSTANTS, :predefined_constant).
add(PREDEFINED_EXPRESSIONS, :predefined)

protected

# Scanner initialization.
def setup
@state = :initial
@brace_depth = 0
end

# CodeRay entry hook. Starts parsing.
def scan_tokens(encoder, options)
state = options[:state] || @state

until eos?
case state

when :initial
if match = scan(/\-\-\[\=*\[/) #--[[ long (possibly multiline) comment ]]
@num_equals = match.count("=") # Number must match for comment end
encoder.begin_group(:comment)
encoder.text_token(match, :delimiter)
state = :long_comment

elsif match = scan(/--.*$/) # --Lua comment
encoder.text_token(match, :comment)

elsif match = scan(/\[=*\[/) # [[ long (possibly multiline) string ]]
@num_equals = match.count("=") # Number must match for comment end
encoder.begin_group(:string)
encoder.text_token(match, :delimiter)
state = :long_string

elsif match = scan(/::\s*[a-zA-Z_][a-zA-Z0-9_]+\s*::/) # ::goto_label::
encoder.text_token(match, :label)

elsif match = scan(/_[A-Z]+/) # _UPPERCASE are names reserved for Lua
encoder.text_token(match, :predefined)

elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) # Normal letters (or letters followed by digits)
kind = IDENT_KIND[match]

# Extra highlighting for entities following certain keywords
if kind == :keyword and match == "function"
state = :function_expected
elsif kind == :keyword and match == "goto"
state = :goto_label_expected
elsif kind == :keyword and match == "local"
state = :local_var_expected
end

encoder.text_token(match, kind)

elsif match = scan(/\{/) # Opening table brace {
encoder.begin_group(:map)
encoder.text_token(match, @brace_depth >= 1 ? :inline_delimiter : :delimiter)
@brace_depth += 1
state = :map

elsif match = scan(/\}/) # Closing table brace }
if @brace_depth == 1
@brace_depth = 0
encoder.text_token(match, :delimiter)
encoder.end_group(:map)
elsif @brace_depth == 0 # Mismatched brace
encoder.text_token(match, :error)
else
@brace_depth -= 1
encoder.text_token(match, :inline_delimiter)
encoder.end_group(:map)
state = :map
end

elsif match = scan(/["']/) # String delimiters " and '
encoder.begin_group(:string)
encoder.text_token(match, :delimiter)
@start_delim = match
state = :string

# ↓Prefix hex number ←|→ decimal number
elsif match = scan(/-? (?:0x\h* \. \h+ (?:p[+\-]?\d+)? | \d*\.\d+ (?:e[+\-]?\d+)?)/ix) # hexadecimal constants have no E power, decimal ones no P power
encoder.text_token(match, :float)

# ↓Prefix hex number ←|→ decimal number
elsif match = scan(/-? (?:0x\h+ (?:p[+\-]?\d+)? | \d+ (?:e[+\-]?\d+)?)/ix) # hexadecimal constants have no E power, decimal ones no P power
encoder.text_token(match, :integer)

elsif match = scan(/[\+\-\*\/%^\#=~<>\(\)\[\]:;,] | \.(?!\d)/x) # Operators
encoder.text_token(match, :operator)

elsif match = scan(/\s+/) # Space
encoder.text_token(match, :space)

else # Invalid stuff. Note that Lua doesn’t accept multibyte chars outside of strings, hence these are also errors.
encoder.text_token(getch, :error)
end

# It may be that we’re scanning a full-blown subexpression of a table
# (tables can contain full expressions in parts).
# If this is the case, return to :map scanning state.
state = :map if state == :initial && @brace_depth >= 1

when :function_expected
if match = scan(/\(.*?\)/m) # x = function() # "Anonymous" function without explicit name
encoder.text_token(match, :operator)
state = :initial
elsif match = scan(/[a-zA-Z_] (?:[a-zA-Z0-9_\.] (?!\.\d))* [\.\:]/x) # function tbl.subtbl.foo() | function tbl:foo() # Colon only allowed as last separator
encoder.text_token(match, :ident)
elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) # function foo()
encoder.text_token(match, :function)
state = :initial
elsif match = scan(/\s+/) # Between the `function' keyword and the ident may be any amount of whitespace
encoder.text_token(match, :space)
else
encoder.text_token(getch, :error)
state = :initial
end

when :goto_label_expected
if match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/)
encoder.text_token(match, :label)
state = :initial
elsif match = scan(/\s+/) # Between the `goto' keyword and the label may be any amount of whitespace
encoder.text_token(match, :space)
else
encoder.text_token(getch, :error)
end

when :local_var_expected
if match = scan(/function/) # local function ...
encoder.text_token(match, :keyword)
state = :function_expected
elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/)
encoder.text_token(match, :local_variable)
elsif match = scan(/,/)
encoder.text_token(match, :operator)
elsif match = scan(/\=/)
encoder.text_token(match, :operator)
# After encountering the equal sign, arbitrary expressions are
# allowed again, so just return to the main state for further
# parsing.
state = :initial
elsif match = scan(/\n/)
encoder.text_token(match, :space)
state = :initial
elsif match = scan(/\s+/)
encoder.text_token(match, :space)
else
encoder.text_token(getch, :error)
end

when :long_comment
if match = scan(/.*?(?=\]={#@num_equals}\])/m)
encoder.text_token(match, :content)

delim = scan(/\]={#@num_equals}\]/)
encoder.text_token(delim, :delimiter)
else # No terminator found till EOF
encoder.text_token(rest, :error)
terminate
end
encoder.end_group(:comment)
state = :initial

when :long_string
if match = scan(/.*?(?=\]={#@num_equals}\])/m) # Long strings do not interpret any escape sequences
encoder.text_token(match, :content)

delim = scan(/\]={#@num_equals}\]/)
encoder.text_token(delim, :delimiter)
else # No terminator found till EOF
encoder.text_token(rest, :error)
terminate
end
encoder.end_group(:string)
state = :initial

when :string
if match = scan(/[^\\#@start_delim\n]+/) # Everything except \ and the start delimiter character is string content (newlines are only allowed if preceeded by \ or \z)
encoder.text_token(match, :content)
elsif match = scan(/\\(?:['"abfnrtv\\]|z\s*|x\h\h|\d{1,3}|\n)/m)
encoder.text_token(match, :char)
elsif match = scan(Regexp.compile(@start_delim))
encoder.text_token(match, :delimiter)
encoder.end_group(:string)
state = :initial
elsif match = scan(/\n/) # Lua forbids unescaped newlines in normal non-long strings
encoder.text_token("\\n\n", :error) # Visually appealing error indicator--otherwise users may wonder whether the highlighter cannot highlight multine strings
encoder.end_group(:string)
state = :initial
else
encoder.text_token(getch, :error)
end

when :map
if match = scan(/[,;]/)
encoder.text_token(match, :operator)
elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]* (?=\s*=)/x)
encoder.text_token(match, :key)
encoder.text_token(scan(/\s+/), :space) if check(/\s+/)
encoder.text_token(scan(/\=/), :operator)
state = :initial
elsif match = scan(/\s+/m)
encoder.text_token(match, :space)
else
# Note this clause doesn’t advance the scan pointer, it’s a kind of
# "retry with other options" (the :initial state then of course
# advances the pointer).
state = :initial
end
else
raise
end

end

if options[:keep_state]
@state = state
end

encoder
end

end

end
end
3 changes: 3 additions & 0 deletions lib/coderay/styles/alpha.rb
Expand Up @@ -99,6 +99,9 @@ class Alpha < Style
.keyword { color:#080; font-weight:bold }
.label { color:#970; font-weight:bold }
.local-variable { color:#963 }
.map .content { color:#808 }
.map .delimiter { color:#40A}
.map { background-color:hsla(200,100%,50%,0.06); }
.namespace { color:#707; font-weight:bold }
.octal { color:#40E }
.operator { }
Expand Down
1 change: 1 addition & 0 deletions lib/coderay/token_kinds.rb
Expand Up @@ -51,6 +51,7 @@ module CodeRay
:keyword => 'keyword', # reserved word that's actually implemented; most scanners
:label => 'label', # C, PHP
:local_variable => 'local-variable', # local and magic variables; some scanners
:map => 'map', # Lua tables
:modifier => 'modifier', # used inside on strings; lots of scanners
:namespace => 'namespace', # Clojure, Java, Taskpaper
:octal => 'octal', # lots of scanners
Expand Down

0 comments on commit 1e330f1

Please sign in to comment.