-
Notifications
You must be signed in to change notification settings - Fork 728
/
disambiguation.rb
158 lines (120 loc) · 3.86 KB
/
disambiguation.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# frozen_string_literal: true
module Rouge
module Guessers
class Disambiguation < Guesser
include Util
include Lexers
def initialize(filename, source)
@filename = File.basename(filename)
@source = source
end
def filter(lexers)
return lexers if lexers.size == 1
return lexers if lexers.size == Lexer.all.size
@analyzer = TextAnalyzer.new(get_source(@source))
self.class.disambiguators.each do |disambiguator|
next unless disambiguator.match?(@filename)
filtered = disambiguator.decide!(self)
return filtered if filtered
end
return lexers
end
def contains?(text)
return @analyzer.include?(text)
end
def matches?(re)
return !!(@analyzer =~ re)
end
@disambiguators = []
def self.disambiguate(*patterns, &decider)
@disambiguators << Disambiguator.new(patterns, &decider)
end
def self.disambiguators
@disambiguators
end
class Disambiguator
include Util
def initialize(patterns, &decider)
@patterns = patterns
@decider = decider
end
def decide!(guesser)
out = guesser.instance_eval(&@decider)
case out
when Array then out
when nil then nil
else [out]
end
end
def match?(filename)
@patterns.any? { |p| test_glob(p, filename) }
end
end
disambiguate '*.pl' do
next Perl if contains?('my $')
next Prolog if contains?(':-')
next Prolog if matches?(/\A\w+(\(\w+\,\s*\w+\))*\./)
end
disambiguate '*.h' do
next ObjectiveC if matches?(/@(end|implementation|protocol|property)\b/)
next ObjectiveC if contains?('@"')
next Cpp if matches?(/^\s*(?:catch|class|constexpr|namespace|private|
protected|public|template|throw|try|using)\b/x)
C
end
disambiguate '*.m' do
next ObjectiveC if matches?(/@(end|implementation|protocol|property)\b/)
next ObjectiveC if contains?('@"')
next Mathematica if contains?('(*')
next Mathematica if contains?(':=')
next Mason if matches?(/<%(def|method|text|doc|args|flags|attr|init|once|shared|perl|cleanup|filter)([^>]*)(>)/)
next Matlab if matches?(/^\s*?%/)
next Mason if matches? %r!(</?%|<&)!
end
disambiguate '*.php' do
# PHP always takes precedence over Hack
PHP
end
disambiguate '*.hh' do
next Cpp if matches?(/^\s*#include/)
next Hack if matches?(/^<\?hh/)
next Hack if matches?(/(\(|, ?)\$\$/)
Cpp
end
disambiguate '*.plist' do
next XML if matches?(/\A<\?xml\b/)
Plist
end
disambiguate '*.sc' do
next Python if matches?(/^#/)
next SuperCollider if matches?(/(?:^~|;$)/)
next Python
end
disambiguate 'Messages' do
next MsgTrans if matches?(/^[^\s:]+:[^\s:]+/)
next PlainText
end
disambiguate '*.cls' do
next TeX if matches?(/\A\s*(?:\\|%)/)
next OpenEdge if matches?(/(no\-undo|BLOCK\-LEVEL|ROUTINE\-LEVEL|&ANALYZE\-SUSPEND)/i)
next Apex
end
disambiguate '*.pp' do
next Puppet if matches?(/(::)?([a-z]\w*::)/)
next Pascal if matches?(/^(function|begin|var)\b/)
next Pascal if matches?(/\b(end(;|\.))/)
Puppet
end
disambiguate '*.p' do
next Prolog if contains?(':-')
next Prolog if matches?(/\A\w+(\(\w+\,\s*\w+\))*\./)
next OpenEdge
end
disambiguate '*.s', '*.S' do
next GnuAsm if matches?(/\s*\.(global|extern|type|text)/)
next GnuAsm if matches?(/%(r|e)(ax|bx|cx|dx|si|di|bp|sp)/)
ArmAsm
end
end
end
end