Skip to content

Commit

Permalink
Use signed ints for ATN serialization not uint16, except for java (#3591
Browse files Browse the repository at this point in the history
)

* refactor serialize so we don't need comments

* more cleanup during refactor

* store language in serializer obj

* A lexer rule token type should never be -1 (EOF). 0 is fragment but then must be > 0.

* Go uses int not uint16 for ATN now. java/go/python3 pass

* remove checks for 0xFFFF in Go.

* C++ uint16_t to int for ATN.

* add mac php dir; fix type on accept() for generated code to be mixed.

* Add test from @KvanTTT. This PR fixes #3555 for non-Java targets.

* cleanup and add big lexer from #3546

* increase mvn mem size to 2G

* increase mvn mem size to 8G

* turn off the big ATN lexer test as we have memory issues during testing.

* Fixes #3592

* Revert "C++ uint16_t to int for ATN."

This reverts commit 4d2ebbf.

# Conflicts:
#	runtime/Cpp/runtime/src/atn/ATNSerializer.cpp
#	runtime/Cpp/runtime/src/tree/xpath/XPathLexer.cpp

* C++ uint16_t to int32_t for ATN.

* rm unnecessary include file, updating project file. get rid of the 0xFFFF does in the C++ deserialization

* rm refs to 0xFFFF in swift

* javascript tests were running as Node...added to ignore list.

* don't distinguish between 16 and 32 bit char sets in serialization; Python2/3  updated to work with this change.

* update C++ to deserialize only 32-bit sets

* 0xFFFF -> -1 for C++ target.

* get other targets to use 32-bit sets in serialization. tests pass locally.

* refactor to reduce code size

* add comment

* oops. comment out call to writeSerializedATNIntegerHistogram(). I wonder if this is why it ran out of memory during testing?

* all but Java, Node, PHP, Go work now for the huge lexer file; I have set them to ignore.  note that the swift target takes over a minute to lex it.  I've turned off Node but it does not seem to terminate but it could terminate eventually.

* all but Java, Node, PHP, Go work now for the huge lexer file; I have set them to ignore.  note that the swift target takes over a minute to lex it.  I've turned off Node but it does not seem to terminate but it could terminate eventually.

* Turn off this big lexer because we get memory errors during continuous integration

* Intermediate commit where I have shuffled around all of the -1 flipping and bumping by two.  work still needs to be done because the token stream rewriter stuff fails. and I assume the other decoding for human readability testing if doesn't work

* convert decode to use int[]; remove dead code. don't use serializeAsChar stuff. more tests pass.

* more tests passing. simplify. When copying atn, must run ATN through serializer to set some state flags.

* 0xFFFD+ are not valid char

* clean up. tests passing now

* huge clean up. Got Java working with 32-bit ATNs!Still working on cleanup but I want to run the tests

* Cleanup the hack I did earlier; everything still seems to work

* Use linux DCO not our old contributors certificate of origin

* remove bump-by-2 code

* clean up per @KvanTTT. Can't test locally on this box. Will see what CI says.

* tweak comment

* Revert "Use linux DCO not our old contributors certificate of origin"

This reverts commit b0f8551.

* see if C++ works in CI for huge ATN
  • Loading branch information
parrt committed Mar 26, 2022
1 parent 44387ad commit 374c361
Show file tree
Hide file tree
Showing 76 changed files with 984 additions and 1,691 deletions.
1 change: 1 addition & 0 deletions .circleci/scripts/run-tests-cpp.sh
Expand Up @@ -3,5 +3,6 @@
set -euo pipefail

pushd runtime-testsuite
export MAVEN_OPTS="-Xmx8g"
mvn -Dparallel=classes -DthreadCount=4 -Dtest=cpp.** test
popd
1 change: 1 addition & 0 deletions .circleci/scripts/run-tests-dart.sh
Expand Up @@ -6,5 +6,6 @@ dart --version

pushd runtime-testsuite
echo "running maven tests..."
export MAVEN_OPTS="-Xmx8g"
mvn -Dparallel=classes -DthreadCount=4 -Dtest=dart.** test
popd
1 change: 1 addition & 0 deletions .circleci/scripts/run-tests-dotnet.sh
Expand Up @@ -3,5 +3,6 @@
set -euo pipefail

pushd runtime-testsuite
export MAVEN_OPTS="-Xmx8g"
mvn -Dparallel=classes -DthreadCount=4 -Dtest=csharp.** test
popd
1 change: 1 addition & 0 deletions .circleci/scripts/run-tests-go.sh
Expand Up @@ -6,5 +6,6 @@ go version

pushd runtime-testsuite
echo "running maven tests..."
export MAVEN_OPTS="-Xmx8g"
mvn -Dparallel=classes -DthreadCount=4 -Dtest=go.** test
popd
1 change: 1 addition & 0 deletions .circleci/scripts/run-tests-javascript.sh
Expand Up @@ -15,6 +15,7 @@ popd
pushd runtime-testsuite

echo "running maven tests..."
export MAVEN_OPTS="-Xmx8g"
mvn -Dtest=javascript.** test
RESULT+=$?

Expand Down
1 change: 1 addition & 0 deletions .circleci/scripts/run-tests-php.sh
Expand Up @@ -7,5 +7,6 @@ php -v
php_path=$(which php)
pushd runtime-testsuite
echo "running maven tests..."
export MAVEN_OPTS="-Xmx8g"
mvn -DPHP_PATH="${php_path}" -Dparallel=classes -DthreadCount=4 -Dtest=php.** test
popd
1 change: 1 addition & 0 deletions .circleci/scripts/run-tests-python2.sh
Expand Up @@ -17,5 +17,6 @@ python2 --version

pushd runtime-testsuite
echo "running maven tests..."
export MAVEN_OPTS="-Xmx8g"
mvn -Dparallel=classes -DthreadCount=4 -Dtest=python2.** test
popd
1 change: 1 addition & 0 deletions .circleci/scripts/run-tests-python3.sh
Expand Up @@ -17,5 +17,6 @@ python3 --version

pushd runtime-testsuite
echo "running maven tests..."
export MAVEN_OPTS="-Xmx8g"
mvn -Dparallel=classes -DthreadCount=4 -Dtest=python3.** test
popd
1 change: 1 addition & 0 deletions .circleci/scripts/run-tests-swift.sh
Expand Up @@ -17,5 +17,6 @@ set -euo pipefail

pushd runtime-testsuite
echo "running maven tests..."
export MAVEN_OPTS="-Xmx8g"
mvn -Dparallel=classes -DthreadCount=4 -Dtest=swift.** test
popd
1 change: 1 addition & 0 deletions .github/scripts-macosx/run-tests-cpp.sh
Expand Up @@ -3,5 +3,6 @@
set -euo pipefail

pushd runtime-testsuite
export MAVEN_OPTS="-Xmx8g"
mvn -Dparallel=classes -DthreadCount=4 -Dtest=cpp.** test
popd
1 change: 1 addition & 0 deletions .github/scripts-macosx/run-tests-dotnet.sh
Expand Up @@ -13,5 +13,6 @@ dotnet build -c Release -f netstandard2.0 runtime/CSharp/Antlr4.csproj

# run tests
pushd runtime-testsuite/
export MAVEN_OPTS="-Xmx8g"
mvn -Dparallel=classes -DthreadCount=4 -Dtest=csharp.** test
popd
1 change: 1 addition & 0 deletions .github/scripts-macosx/run-tests-swift.sh
Expand Up @@ -38,6 +38,7 @@ swift build --version
cd runtime-testsuite/
# mvn -e -Dparallel=classes -DthreadCount=4 -Dtest=swift.** test
# I don't know swift enough to make it parallel. revert to single threaded
export MAVEN_OPTS="-Xmx8g"
mvn -e -Dtest=swift.** test
rc=$?
cat target/surefire-reports/*.dumpstream || true
Expand Down
1 change: 1 addition & 0 deletions .github/scripts-windows/run-tests-csharp.cmd
@@ -1,5 +1,6 @@
dotnet build runtime/CSharp/src/Antlr4.csproj -c Release
dotnet pack runtime/CSharp/src/Antlr4.csproj -c Release
cd runtime-testsuite
export MAVEN_OPTS="-Xmx8g"
mvn -Dparallel=classes -DthreadCount=2 -Dtest=csharp.** test
cd ..
1 change: 1 addition & 0 deletions .github/scripts-windows/run-tests-dart.cmd
@@ -1,5 +1,6 @@
C:\ProgramData\chocolatey\bin\choco.exe -y install dart-sdk

cd runtime-testsuite
export MAVEN_OPTS="-Xmx8g"
mvn -Dtest=dart.** test -Dantlr-dart-dart="C:\tools\dart-sdk\bin\dart.exe" -Dantlr-dart-pub="C:\tools\dart-sdk\bin\pub.bat" -Dantlr-dart-dart2native="C:\tools\dart-sdk\bin\dart2native.bat"
cd ..
1 change: 1 addition & 0 deletions .github/scripts-windows/run-tests-go.cmd
@@ -1,3 +1,4 @@
cd runtime-testsuite
export MAVEN_OPTS="-Xmx8g"
mvn -Dparallel=classes -DthreadCount=2 -Dtest=go.** test
cd ..
1 change: 1 addition & 0 deletions .github/scripts-windows/run-tests-java.cmd
@@ -1,3 +1,4 @@
cd runtime-testsuite
export MAVEN_OPTS="-Xmx8g"
mvn -Dparallel=classes -DthreadCount=2 -Dtest=java.** test
cd ..
1 change: 1 addition & 0 deletions .github/scripts-windows/run-tests-javascript.cmd
@@ -1,3 +1,4 @@
cd runtime-testsuite
export MAVEN_OPTS="-Xmx8g"
mvn -Dparallel=classes -DthreadCount=2 -Dtest=javascript.** test
cd ..
1 change: 1 addition & 0 deletions .github/scripts-windows/run-tests-php.cmd
Expand Up @@ -4,5 +4,6 @@ git clone https://github.com/antlr/antlr-php-runtime.git
move antlr-php-runtime runtime\PHP

cd runtime-testsuite
export MAVEN_OPTS="-Xmx8g"
mvn -Dparallel=classes -DthreadCount=2 -Dtest=php.** test -Dantlr-php-php="C:\tools\php81\php.exe"
cd ..
1 change: 1 addition & 0 deletions .github/scripts-windows/run-tests-python2.cmd
@@ -1,3 +1,4 @@
cd runtime-testsuite
export MAVEN_OPTS="-Xmx8g"
mvn -Dparallel=classes -DthreadCount=2 -Dantlr-python2-python="C:\Python27\python.exe" -Dtest=python2.** test
cd ..
1 change: 1 addition & 0 deletions .github/scripts-windows/run-tests-python3.cmd
@@ -1,3 +1,4 @@
cd runtime-testsuite
export MAVEN_OPTS="-Xmx8g"
mvn -Dparallel=classes -DthreadCount=2 -Dantlr-python3-python="C:\Python310\python.exe" -Dtest=python3.** test
cd ..
2 changes: 2 additions & 0 deletions runtime-testsuite/pom.xml
Expand Up @@ -132,6 +132,8 @@
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<release>8</release>
<source>9</source>
<target>9</target>
</configuration>
</plugin>
</plugins>
Expand Down
@@ -0,0 +1,13 @@
[type]
Lexer

[grammar]
lexer grammar L;
T_FFFF: 'FFFF' -> type(65535);

[input]
FFFF

[output]
[@0,0:3='FFFF',<65535>,1:0]
[@1,4:3='<EOF>',<-1>,1:4]
Expand Up @@ -6,7 +6,7 @@ Lexer

[grammar]
lexer grammar L;
ID : ([A-Z_]|'Ā'..'') ([A-Z_0-9]|'Ā'..'')*;
ID : ([A-Z_]|'Ā'..'\uFFFC') ([A-Z_0-9]|'Ā'..'\uFFFC')*; // FFFD+ are not valid char

[input]
Expand Down
Expand Up @@ -366,9 +366,10 @@ public static RuntimeTestDescriptor[] getRuntimeTestDescriptors(String group, St
}

if (group.equals("LexerExec")) {
descriptors.add(GeneratedLexerDescriptors.getLineSeparatorLfTest(targetName));
descriptors.add(GeneratedLexerDescriptors.getLineSeparatorCrLfTest(targetName));
descriptors.add(GeneratedLexerDescriptors.getLineSeparatorLfDescriptor(targetName));
descriptors.add(GeneratedLexerDescriptors.getLineSeparatorCrLfDescriptor(targetName));
descriptors.add(GeneratedLexerDescriptors.getLargeLexerDescriptor(targetName));
descriptors.add(GeneratedLexerDescriptors.getAtnStatesSizeMoreThan65535Descriptor(targetName));
}

return descriptors.toArray(new RuntimeTestDescriptor[0]);
Expand Down
Expand Up @@ -6,6 +6,7 @@
import org.antlr.v4.runtime.atn.ATN;
import org.antlr.v4.runtime.atn.ATNDeserializer;
import org.antlr.v4.runtime.atn.ATNSerializer;
import org.antlr.v4.runtime.misc.IntegerList;
import org.antlr.v4.semantics.SemanticPipeline;
import org.antlr.v4.tool.Grammar;
import org.antlr.v4.tool.LexerGrammar;
Expand Down Expand Up @@ -219,8 +220,9 @@ protected ATN createATN(Grammar g, boolean useSerializer) {

ATN atn = g.atn;
if ( useSerializer ) {
char[] serialized = ATNSerializer.getSerializedAsChars(atn, g.getLanguage());
return new ATNDeserializer().deserialize(serialized);
// sets some flags in ATN
IntegerList serialized = ATNSerializer.getSerialized(atn);
return new ATNDeserializer().deserialize(serialized.toArray());
}

return atn;
Expand Down
@@ -1,7 +1,9 @@
package org.antlr.v4.test.runtime;

import java.util.*;

public class GeneratedLexerDescriptors {
static RuntimeTestDescriptor getLineSeparatorLfTest(String targetName) {
static RuntimeTestDescriptor getLineSeparatorLfDescriptor(String targetName) {
UniversalRuntimeTestDescriptor result = new UniversalRuntimeTestDescriptor();
result.name = "LineSeparatorLf";
result.targetName = targetName;
Expand All @@ -20,7 +22,7 @@ static RuntimeTestDescriptor getLineSeparatorLfTest(String targetName) {
return result;
}

static RuntimeTestDescriptor getLineSeparatorCrLfTest(String targetName) {
static RuntimeTestDescriptor getLineSeparatorCrLfDescriptor(String targetName) {
UniversalRuntimeTestDescriptor result = new UniversalRuntimeTestDescriptor();
result.name = "LineSeparatorCrLf";
result.targetName = targetName;
Expand Down Expand Up @@ -65,4 +67,64 @@ static RuntimeTestDescriptor getLargeLexerDescriptor(String targetName) {
"[@1,5:4='<EOF>',<-1>,1:5]\n";
return result;
}

static RuntimeTestDescriptor getAtnStatesSizeMoreThan65535Descriptor(String targetName) {
UniversalRuntimeTestDescriptor result = new UniversalRuntimeTestDescriptor();
result.name = "AtnStatesSizeMoreThan65535";
result.notes = "Regression for https://github.com/antlr/antlr4/issues/1863";
result.targetName = targetName;
result.testType = "Lexer";

// I tried playing around with different sizes and I think 1002 works for Go but 1003 does not;
// the executing lexer gets a token syntax error for T208 or something like that
final int tokensCount = 1024;
final String suffix = String.join("", Collections.nCopies(70, "_"));

String grammarName = "L";
StringBuilder grammar = new StringBuilder();
grammar.append("lexer grammar ").append(grammarName).append(";\n");
grammar.append('\n');
StringBuilder input = new StringBuilder();
StringBuilder output = new StringBuilder();
int startOffset;
int stopOffset = -2;
for (int i = 0; i < tokensCount; i++) {
String ruleName = String.format("T_%06d", i);
String value = ruleName+suffix;
grammar.append(ruleName).append(": '").append(value).append("';\n");
input.append(value).append('\n');

startOffset = stopOffset + 2;
stopOffset += value.length() + 1;

output.append("[@").append(i).append(',').append(startOffset).append(':').append(stopOffset)
.append("='").append(value).append("',<").append(i + 1).append(">,").append(i + 1)
.append(":0]\n");
}

grammar.append("\n");
grammar.append("WS: [ \\t\\r\\n]+ -> skip;\n");

startOffset = stopOffset + 2;
stopOffset = startOffset - 1;
output.append("[@").append(tokensCount).append(',').append(startOffset).append(':').append(stopOffset)
.append("='<EOF>',<-1>,").append(tokensCount + 1).append(":0]\n");

result.grammar = grammar.toString();
result.grammarName = grammarName;
result.input = input.toString();
result.output = output.toString();

// We seem to get memory errors and so I am turning this off during CI
List<String> all = Arrays.asList(
// "CSharp", "Python2", "Python3", "Cpp", "Go", "PHP", "Swift", "Java", "JavaScript", "Node", "Dart"
"CSharp", "Python2", "Python3", "Go", "PHP", "Swift", "JavaScript", "Node", "Dart"
);
result.skipTargets.addAll(all);

// result.skipTargets.add("Node"); // doesn't terminate
// result.skipTargets.add("PHP"); // "Allowed memory size of 134217728 bytes exhausted (tried to allocate 16384 bytes)..."
// result.skipTargets.add("Go"); // syntax error
return result;
}
}
Expand Up @@ -6,20 +6,18 @@
import org.antlr.v4.runtime.atn.ATN;
import org.antlr.v4.runtime.atn.ATNDeserializer;
import org.antlr.v4.runtime.atn.ATNSerializer;
import org.antlr.v4.runtime.misc.IntegerList;
import org.antlr.v4.runtime.misc.InterpreterDataReader;
import org.antlr.v4.runtime.misc.Utils;
import org.antlr.v4.tool.Grammar;
import org.junit.Assert;
import org.junit.Test;

import java.io.IOException;
import java.lang.reflect.Field;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

/** This file represents a simple sanity checks on the parsing of the .interp file
Expand Down Expand Up @@ -76,8 +74,8 @@ public void testParseFile() throws IOException, NoSuchFieldException, IllegalAcc
Assert.assertNull(channels);
Assert.assertNull(modes);

char[] atnChars = ATNSerializer.getSerializedAsChars(atn, g.getLanguage());
Assert.assertEquals(ATNDeserializer.SERIALIZED_VERSION, atnChars[0]);
IntegerList serialized = ATNSerializer.getSerialized(atn);
Assert.assertEquals(ATNDeserializer.SERIALIZED_VERSION, serialized.get(0));
}

private <T> List<T> castList(Object obj, Class<T> clazz) {
Expand Down
Expand Up @@ -55,7 +55,7 @@
*
* Sample output on OS X with 4 GHz Intel Core i7 (us == microseconds, 1/1000 of a millisecond):
*
Java VM args: -Xms2G -Xmx2G
Java VM args: -Xms2G -Xmx8g
Warming up Java compiler....
load_legacy_java_ascii_file average time 53us size 58384b over 3500 loads of 29038 symbols from Parser.java
load_legacy_java_ascii_file average time 27us size 15568b over 3500 loads of 7625 symbols from RuleContext.java
Expand Down
Expand Up @@ -244,7 +244,7 @@ private String locateTool(String tool) {
return phpPath;
}

String[] roots = {"/usr/local/bin/", "/opt/local/bin", "/usr/bin/"};
String[] roots = {"/usr/local/bin/", "/opt/local/bin", "/opt/homebrew/bin/", "/usr/bin/"};

for (String root: roots) {
if (new File(root + tool).exists()) {
Expand Down
23 changes: 3 additions & 20 deletions runtime/CSharp/src/Atn/ATNDeserializer.cs
Expand Up @@ -45,8 +45,7 @@ public virtual ATN Deserialize(int[] data)
ReadRules (atn);
ReadModes (atn);
IList<IntervalSet> sets = new List<IntervalSet>();
ReadSets (atn, sets, ReadInt);
ReadSets (atn, sets, ReadInt32);
ReadSets (atn, sets);
ReadEdges (atn, sets);
ReadDecisions (atn);
ReadLexerActions (atn);
Expand Down Expand Up @@ -190,15 +189,7 @@ protected internal virtual void ReadLexerActions(ATN atn)
{
LexerActionType actionType = (LexerActionType)ReadInt();
int data1 = ReadInt();
if (data1 == unchecked((int)(0xFFFF)))
{
data1 = -1;
}
int data2 = ReadInt();
if (data2 == unchecked((int)(0xFFFF)))
{
data2 = -1;
}
ILexerAction lexerAction = LexerActionFactory(actionType, data1, data2);
atn.lexerActions[i_10] = lexerAction;
}
Expand Down Expand Up @@ -309,7 +300,7 @@ protected internal virtual void ReadEdges(ATN atn, IList<IntervalSet> sets)
}
}

protected internal virtual void ReadSets(ATN atn, IList<IntervalSet> sets, System.Func<int> readUnicode)
protected internal virtual void ReadSets(ATN atn, IList<IntervalSet> sets)
{
//
// SETS
Expand All @@ -327,7 +318,7 @@ protected internal virtual void ReadSets(ATN atn, IList<IntervalSet> sets, Syste
}
for (int j = 0; j < nintervals; j++)
{
set.Add(readUnicode(), readUnicode());
set.Add(ReadInt(), ReadInt());
}
}
}
Expand Down Expand Up @@ -369,9 +360,6 @@ protected internal virtual void ReadRules(ATN atn)
atn.ruleToStartState[i_5] = startState;
if (atn.grammarType == ATNType.Lexer) {
int tokenType = ReadInt ();
if (tokenType == unchecked((int)(0xFFFF))) {
tokenType = TokenConstants.EOF;
}
atn.ruleToTokenType [i_5] = tokenType;
}
}
Expand Down Expand Up @@ -967,11 +955,6 @@ protected internal int ReadInt()
return data[p++];
}

protected internal int ReadInt32()
{
return (int)data[p++] | ((int)data[p++] << 16);
}

[return: NotNull]
protected internal virtual Transition EdgeFactory(ATN atn, TransitionType type, int src, int trg, int arg1, int arg2, int arg3, IList<IntervalSet> sets)
{
Expand Down

0 comments on commit 374c361

Please sign in to comment.