forked from antlr/antlr4
/
GeneratedLexerDescriptors.java
123 lines (107 loc) · 4.42 KB
/
GeneratedLexerDescriptors.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
package org.antlr.v4.test.runtime;
import java.util.*;
public class GeneratedLexerDescriptors {
static RuntimeTestDescriptor getLineSeparatorLfDescriptor(String targetName) {
UniversalRuntimeTestDescriptor result = new UniversalRuntimeTestDescriptor();
result.name = "LineSeparatorLf";
result.targetName = targetName;
result.testType = "Lexer";
result.grammar = "lexer grammar L;\n" +
"T: ~'\\n'+;\n" +
"SEPARATOR: '\\n';";
result.grammarName = "L";
result.input = "1\n2\n3";
result.output = "[@0,0:0='1',<1>,1:0]\n" +
"[@1,1:1='\\n',<2>,1:1]\n" +
"[@2,2:2='2',<1>,2:0]\n" +
"[@3,3:3='\\n',<2>,2:1]\n" +
"[@4,4:4='3',<1>,3:0]\n" +
"[@5,5:4='<EOF>',<-1>,3:1]\n";
return result;
}
static RuntimeTestDescriptor getLineSeparatorCrLfDescriptor(String targetName) {
UniversalRuntimeTestDescriptor result = new UniversalRuntimeTestDescriptor();
result.name = "LineSeparatorCrLf";
result.targetName = targetName;
result.testType = "Lexer";
result.grammar = "lexer grammar L;\n" +
"T: ~'\\r'+;\n" +
"SEPARATOR: '\\r\\n';";
result.grammarName = "L";
result.input = "1\r\n2\r\n3";
result.output = "[@0,0:0='1',<1>,1:0]\n" +
"[@1,1:2='\\r\\n',<2>,1:1]\n" +
"[@2,3:3='2',<1>,2:0]\n" +
"[@3,4:5='\\r\\n',<2>,2:1]\n" +
"[@4,6:6='3',<1>,3:0]\n" +
"[@5,7:6='<EOF>',<-1>,3:1]\n";
return result;
}
static RuntimeTestDescriptor getLargeLexerDescriptor(String targetName) {
UniversalRuntimeTestDescriptor result = new UniversalRuntimeTestDescriptor();
result.name = "LargeLexer";
result.notes = "This is a regression test for antlr/antlr4#76 \"Serialized ATN strings\n" +
"should be split when longer than 2^16 bytes (class file limitation)\"\n" +
"https://github.com/antlr/antlr4/issues/76";
result.targetName = targetName;
result.testType = "Lexer";
final int tokensCount = 4000;
String grammarName = "L";
StringBuilder grammar = new StringBuilder();
grammar.append("lexer grammar ").append(grammarName).append(";\n");
grammar.append("WS: [ \\t\\r\\n]+ -> skip;\n");
for (int i = 0; i < tokensCount; i++) {
grammar.append("KW").append(i).append(" : 'KW' '").append(i).append("';\n");
}
result.grammar = grammar.toString();
result.grammarName = grammarName;
result.input = "KW400";
result.output = "[@0,0:4='KW400',<402>,1:0]\n" +
"[@1,5:4='<EOF>',<-1>,1:5]\n";
return result;
}
static RuntimeTestDescriptor getAtnStatesSizeMoreThan65535Descriptor(String targetName) {
UniversalRuntimeTestDescriptor result = new UniversalRuntimeTestDescriptor();
result.name = "AtnStatesSizeMoreThan65535";
result.notes = "Regression for https://github.com/antlr/antlr4/issues/1863";
result.targetName = targetName;
result.testType = "Lexer";
final int tokensCount = 1024;
final String suffix = String.join("", Collections.nCopies(70, "_"));
String grammarName = "L";
StringBuilder grammar = new StringBuilder();
grammar.append("lexer grammar ").append(grammarName).append(";\n");
grammar.append('\n');
StringBuilder input = new StringBuilder();
StringBuilder output = new StringBuilder();
int startOffset;
int stopOffset = -2;
for (int i = 0; i < tokensCount; i++) {
String ruleName = String.format("T_%06d", i);
String value = ruleName+suffix;
grammar.append(ruleName).append(": '").append(value).append("';\n");
input.append(value).append('\n');
startOffset = stopOffset + 2;
stopOffset += value.length() + 1;
output.append("[@").append(i).append(',').append(startOffset).append(':').append(stopOffset)
.append("='").append(value).append("',<").append(i + 1).append(">,").append(i + 1)
.append(":0]\n");
}
grammar.append("\n");
grammar.append("WS: [ \\t\\r\\n]+ -> skip;\n");
startOffset = stopOffset + 2;
stopOffset = startOffset - 1;
output.append("[@").append(tokensCount).append(',').append(startOffset).append(':').append(stopOffset)
.append("='<EOF>',<-1>,").append(tokensCount + 1).append(":0]\n");
result.grammar = grammar.toString();
result.grammarName = grammarName;
result.input = input.toString();
result.output = output.toString();
List<String> all = Arrays.asList("CSharp", "Python2", "Python3", "Cpp", "Go", "PHP", "Swift", "Java", "JavaScript", "Dart");
result.skipTargets.addAll(all);
// result.skipTargets.add("Java"); // can't handle > 16bit states yet
// result.skipTargets.add("JavaScript"); // doesn't terminate
// result.skipTargets.add("Go"); // syntax error
return result;
}
}