Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Scanner for Scala lang #179

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
40 changes: 21 additions & 19 deletions lib/coderay/helpers/file_type.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
module CodeRay

# = FileType
#
# A simple filetype recognizer.
Expand All @@ -8,18 +8,18 @@ module CodeRay
#
# # determine the type of the given
# lang = FileType[file_name]
#
#
# # return :text if the file type is unknown
# lang = FileType.fetch file_name, :text
#
#
# # try the shebang line, too
# lang = FileType.fetch file_name, :text, true
module FileType

UnknownFileType = Class.new Exception

class << self

# Try to determine the file type of the file.
#
# +filename+ is a relative or absolute path to a file.
Expand All @@ -30,7 +30,7 @@ def [] filename, read_shebang = false
name = File.basename filename
ext = File.extname(name).sub(/^\./, '') # from last dot, delete the leading dot
ext2 = filename.to_s[/\.(.*)/, 1] # from first dot

type =
TypeFromExt[ext] ||
TypeFromExt[ext.downcase] ||
Expand All @@ -39,10 +39,10 @@ def [] filename, read_shebang = false
TypeFromName[name] ||
TypeFromName[name.downcase]
type ||= type_from_shebang(filename) if read_shebang

type
end

# This works like Hash#fetch.
#
# If the filetype cannot be found, the +default+ value
Expand All @@ -51,7 +51,7 @@ def fetch filename, default = nil, read_shebang = false
if default && block_given?
warn 'Block supersedes default value argument; use either.'
end

if type = self[filename, read_shebang]
type
else
Expand All @@ -60,9 +60,9 @@ def fetch filename, default = nil, read_shebang = false
raise UnknownFileType, 'Could not determine type of %p.' % filename
end
end

protected

def type_from_shebang filename
return unless File.exist? filename
File.open filename, 'r' do |f|
Expand All @@ -73,9 +73,9 @@ def type_from_shebang filename
end
end
end

end

TypeFromExt = {
'c' => :c,
'cfc' => :xml,
Expand All @@ -86,7 +86,7 @@ def type_from_shebang filename
'dpr' => :delphi,
'erb' => :erb,
'gemspec' => :ruby,
'go' => :go,
'go' => :go,
'groovy' => :groovy,
'gvy' => :groovy,
'h' => :c,
Expand Down Expand Up @@ -120,6 +120,8 @@ def type_from_shebang filename
'ru' => :ruby, # config.ru
'rxml' => :ruby,
'sass' => :sass,
'sbt' => :scala,
'scala' => :scala,
'sql' => :sql,
'taskpaper' => :taskpaper,
'template' => :json, # AWS CloudFormation template
Expand All @@ -133,9 +135,9 @@ def type_from_shebang filename
for cpp_alias in %w[cc cpp cp cxx c++ C hh hpp h++ cu]
TypeFromExt[cpp_alias] = :cpp
end

TypeFromShebang = /\b(?:ruby|perl|python|sh)\b/

TypeFromName = {
'Capfile' => :ruby,
'Rakefile' => :ruby,
Expand All @@ -145,7 +147,7 @@ def type_from_shebang filename
'Vagrantfile' => :ruby,
'Appraisals' => :ruby
}

end

end
171 changes: 171 additions & 0 deletions lib/coderay/scanners/scala.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
module CodeRay
module Scanners

# Scanner for Scala.
class Scala < Scanner

register_for :scala

autoload :BuiltinTypes, CodeRay.coderay_path('scanners', 'scala', 'builtin_types')

KEYWORDS = %w[
abstract case catch class def do else extends
finally for forSome if implicit import import lazy match new
object override package private protected
return sealed throw trait try type
val var while with yield
] # :nodoc:
RESERVED = %w[ const goto ] # :nodoc:
CONSTANTS = %w[ false null true ] # :nodoc:
MAGIC_VARIABLES = %w[ this super ] # :nodoc:
TYPES = %w[
boolean byte char class double enum float int interface long
short void
] << '[]' # :nodoc: because int[] should be highlighted as a type
DIRECTIVES = %w[
abstract extends final implements native private protected public
static strictfp synchronized throws transient volatile
] # :nodoc:

IDENT_KIND = WordList.new(:ident).
add(KEYWORDS, :keyword).
add(RESERVED, :reserved).
add(CONSTANTS, :predefined_constant).
add(MAGIC_VARIABLES, :local_variable).
add(TYPES, :type).
add(BuiltinTypes::List, :predefined_type).
add(BuiltinTypes::List.select { |builtin| builtin[/(Error|Exception)$/] }, :exception).
add(DIRECTIVES, :directive) # :nodoc:

ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc:
STRING_CONTENT_PATTERN = {
"'" => /[^\\']+/,
'"' => /[^\\"]+/,
'/' => /[^\\\/]+/
} # :nodoc:
IDENT = /[a-zA-Z_][A-Za-z_0-9]*/ # :nodoc:

protected

def scan_tokens encoder, options

state = :initial
string_delimiter = nil
package_name_expected = false
class_name_follows = false
last_token_dot = false

until eos?

case state

when :initial

if match = scan(/ \s+ | \\\n /x)
encoder.text_token match, :space
next

elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
encoder.text_token match, :comment
next

elsif match = scan(/ #{IDENT} | \[\] /ox)
kind = IDENT_KIND[match]
if last_token_dot
kind = :ident
elsif class_name_follows
kind = :class
class_name_follows = false
else
case match
when 'import'
package_name_expected = :include
when 'package'
package_name_expected = :namespace
when 'class', 'interface', 'trait'
class_name_follows = true
end
end
encoder.text_token match, kind

elsif match = scan(/ \.(?!\d) | [,?:()\[\]}] | -- | \+\+ | && | \|\| | \*\*=? | [-+*\/%^~&|<>=!]=? | <<<?=? | >>>?=? /x)
encoder.text_token match, :operator

elsif match = scan(/;/)
package_name_expected = false
encoder.text_token match, :operator

elsif match = scan(/\{/)
class_name_follows = false
encoder.text_token match, :operator

elsif check(/[\d.]/)
if match = scan(/0[xX][0-9A-Fa-f]+/)
encoder.text_token match, :hex
elsif match = scan(/(?>0[0-7]+)(?![89.eEfF])/)
encoder.text_token match, :octal
elsif match = scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)
encoder.text_token match, :float
elsif match = scan(/\d+[lL]?/)
encoder.text_token match, :integer
end

elsif match = scan(/["']/)
state = :string
encoder.begin_group state
string_delimiter = match
encoder.text_token match, :delimiter

elsif match = scan(/ @ #{IDENT} /ox)
encoder.text_token match, :annotation

else
encoder.text_token getch, :error

end

when :string
if match = scan(STRING_CONTENT_PATTERN[string_delimiter])
encoder.text_token match, :content
elsif match = scan(/["'\/]/)
encoder.text_token match, :delimiter
encoder.end_group state
state = :initial
string_delimiter = nil
elsif state == :string && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
if string_delimiter == "'" && !(match == "\\\\" || match == "\\'")
encoder.text_token match, :content
else
encoder.text_token match, :char
end
elsif match = scan(/\\./m)
encoder.text_token match, :content
elsif match = scan(/ \\ | $ /x)
encoder.end_group state
state = :initial
encoder.text_token match, :error unless match.empty?
else
raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
end

else
raise_inspect 'Unknown state', encoder

end

last_token_dot = match == '.'

end

if state == :string
encoder.end_group state
end

encoder
end

end

end
end