/
lexer_spec.rb
179 lines (145 loc) · 5.7 KB
/
lexer_spec.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
# frozen_string_literal: true
require "spec_helper"
describe GraphQL::Language::Lexer do
subject { GraphQL::Language::Lexer }
describe ".tokenize" do
let(:query_string) {%|
{
query getCheese {
cheese(id: 1) {
... cheeseFields
}
}
}
|}
let(:tokens) { subject.tokenize(query_string) }
it "makes utf-8 comments" do
tokens = subject.tokenize("# 不要!\n{")
comment_token = tokens.first.prev_token
assert_equal "# 不要!", comment_token.to_s
end
it "keeps track of previous_token" do
assert_equal tokens[0], tokens[1].prev_token
end
it "allows escaped quotes in strings" do
tokens = subject.tokenize('"a\\"b""c"')
assert_equal 'a"b', tokens[0].value
assert_equal 'c', tokens[1].value
end
it "handles escaped backslashes before escaped quotes" do
tokens = subject.tokenize('text: "b\\\\", otherText: "a"')
assert_equal ['text', ':', 'b\\', 'otherText', ':', 'a',], tokens.map(&:value)
end
describe "block strings" do
let(:query_string) { %|{ a(b: """\nc\n \\""" d\n""" """""e""""")}|}
it "tokenizes them" do
assert_equal "c\n \"\"\" d", tokens[5].value
assert_equal "\"\"e\"\"", tokens[6].value
end
it "tokenizes 10 quote edge case correctly" do
tokens = subject.tokenize('""""""""""')
assert_equal '""', tokens[0].value # first 8 quotes are a valid block string """"""""
assert_equal '', tokens[1].value # last 2 quotes are a valid string ""
end
it "tokenizes with nested single quote strings correctly" do
tokens = subject.tokenize('"""{"x"}"""')
assert_equal '{"x"}', tokens[0].value
tokens = subject.tokenize('"""{"foo":"bar"}"""')
assert_equal '{"foo":"bar"}', tokens[0].value
end
it "tokenizes empty block strings correctly" do
empty_block_string = '""""""'
tokens = subject.tokenize(empty_block_string)
assert_equal '', tokens[0].value
end
it "tokenizes escaped backslashes at the end of blocks" do
tokens = subject.tokenize('text: """b\\\\""", otherText: "a"')
assert_equal ['text', ':', 'b\\', 'otherText', ':', 'a',], tokens.map(&:value)
end
end
it "unescapes escaped characters" do
assert_equal "\" \\ / \b \f \n \r \t", subject.tokenize('"\\" \\\\ \\/ \\b \\f \\n \\r \\t"').first.to_s
end
it "unescapes escaped unicode characters" do
assert_equal "\t", subject.tokenize('"\\u0009"').first.to_s
assert_equal "\t", subject.tokenize('"\\u{0009}"').first.to_s
assert_equal "𐘑", subject.tokenize('"\\u{10611}"').first.to_s
assert_equal "💩", subject.tokenize('"\\u{1F4A9}"').first.to_s
assert_equal "💩", subject.tokenize('"\\uD83D\\uDCA9"').first.to_s
end
it "accepts the full range of unicode" do
assert_equal "💩", subject.tokenize('"💩"').first.to_s
assert_equal "⌱", subject.tokenize('"⌱"').first.to_s
assert_equal "🂡\n🂢", subject.tokenize('"""🂡
🂢"""').first.to_s
end
it "doesn't accept unicode outside strings or comments" do
assert_equal :UNKNOWN_CHAR, GraphQL.scan('😘 ').first.name
end
it "rejects bad unicode, even when there's good unicode in the string" do
assert_equal :BAD_UNICODE_ESCAPE, subject.tokenize('"\\u0XXF \\u0009"').first.name
end
it "rejects truly invalid UTF-8 bytes" do
error_filename = "spec/support/parser/filename_example_invalid_utf8.graphql"
assert_equal :BAD_UNICODE_ESCAPE, subject.tokenize(File.read(error_filename)).first.name
end
it "rejects unicode that's well-formed but results in invalidly-encoded strings" do
# when the string here gets tokenized into an actual `:STRING`, it results in `valid_encoding?` being false for
# the ruby string so application code usually blows up trying to manipulate it
assert_equal :BAD_UNICODE_ESCAPE, subject.tokenize('"\\udc00\\udf2c"').first.name
assert_equal :BAD_UNICODE_ESCAPE, subject.tokenize('"\\u{dc00}\\u{df2c}"').first.name
end
it "clears the previous_token between runs" do
tok_2 = subject.tokenize(query_string)
assert_nil tok_2[0].prev_token
end
it "counts string position properly" do
tokens = subject.tokenize('{ a(b: "c")}')
str_token = tokens[5]
assert_equal :STRING, str_token.name
assert_equal "c", str_token.value
assert_equal 8, str_token.col
assert_equal '(STRING "c" [1:8])', str_token.inspect
rparen_token = tokens[6]
assert_equal '(RPAREN ")" [1:11])', rparen_token.inspect
end
it "counts block string line properly" do
str = <<-GRAPHQL
"""
Here is a
multiline description
"""
type Query {
a: B
}
"Here's another description"
type B {
a: B
}
"""
And another
multiline description
"""
type C {
a: B
}
GRAPHQL
tokens = subject.tokenize(str)
string_tok, type_keyword_tok, query_name_tok,
_curly, _ident, _colon, _ident, _curly,
string_tok_2, type_keyword_tok_2, b_name_tok,
_curly, _ident, _colon, _ident, _curly,
string_tok_3, type_keyword_tok_3, c_name_tok = tokens
assert_equal 1, string_tok.line
assert_equal 5, type_keyword_tok.line
assert_equal 5, query_name_tok.line
# Make sure it handles the empty spaces, too
assert_equal 9, string_tok_2.line
assert_equal 11, type_keyword_tok_2.line
assert_equal 11, b_name_tok.line
assert_equal 15, string_tok_3.line
assert_equal 21, type_keyword_tok_3.line
assert_equal 21, c_name_tok.line
end
end
end