/
lexer.rb
62 lines (57 loc) · 1.5 KB
/
lexer.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# frozen_string_literal: true
require "strscan"
module Liquid
class Lexer
SPECIALS = {
'|' => :pipe,
'.' => :dot,
':' => :colon,
',' => :comma,
'[' => :open_square,
']' => :close_square,
'(' => :open_round,
')' => :close_round,
'?' => :question,
'-' => :dash,
}.freeze
IDENTIFIER = /[a-zA-Z_][\w-]*\??/
SINGLE_STRING_LITERAL = /'[^\']*'/
DOUBLE_STRING_LITERAL = /"[^\"]*"/
NUMBER_LITERAL = /-?\d+(\.\d+)?/
DOTDOT = /\.\./
COMPARISON_OPERATOR = /==|!=|<>|<=?|>=?|contains(?=\s)/
WHITESPACE_OR_NOTHING = /\s*/
def initialize(input)
@ss = StringScanner.new(input)
end
def tokenize
@output = []
until @ss.eos?
@ss.skip(WHITESPACE_OR_NOTHING)
break if @ss.eos?
tok = if (t = @ss.scan(COMPARISON_OPERATOR))
[:comparison, t]
elsif (t = @ss.scan(SINGLE_STRING_LITERAL))
[:string, t]
elsif (t = @ss.scan(DOUBLE_STRING_LITERAL))
[:string, t]
elsif (t = @ss.scan(NUMBER_LITERAL))
[:number, t]
elsif (t = @ss.scan(IDENTIFIER))
[:id, t]
elsif (t = @ss.scan(DOTDOT))
[:dotdot, t]
else
c = @ss.getch
if (s = SPECIALS[c])
[s, c]
else
raise SyntaxError, "Unexpected character #{c}"
end
end
@output << tok
end
@output << [:end_of_string]
end
end
end