From 374c361e3744dd2928db7f771bc7051e0125dcd4 Mon Sep 17 00:00:00 2001
From: Terence Parr <parrt@antlr.org>
Date: Sat, 26 Mar 2022 13:30:58 -0700
Subject: [PATCH] Use signed ints for ATN serialization not uint16, except for
 java (#3591)

* refactor serialize so we don't need comments

* more cleanup during refactor

* store language in serializer obj

* A lexer rule token type should never be -1 (EOF). 0 is fragment but then must be > 0.

* Go uses int not uint16 for ATN now. java/go/python3 pass

* remove checks for 0xFFFF in Go.

* C++ uint16_t to int for ATN.

* add mac php dir; fix type on accept() for generated code to be mixed.

* Add test from @kvanTTT. This PR fixes https://github.com/antlr/antlr4/issues/3555 for non-Java targets.

* cleanup and add big lexer from https://github.com/antlr/antlr4/pull/3546

* increase mvn mem size to 2G

* increase mvn mem size to 8G

* turn off the big ATN lexer test as we have memory issues during testing.

* Fixes #3592

* Revert "C++ uint16_t to int for ATN."

This reverts commit 4d2ebbf5671a5b373d2ca3b5a05464ccb8b71b52.

# Conflicts:
#	runtime/Cpp/runtime/src/atn/ATNSerializer.cpp
#	runtime/Cpp/runtime/src/tree/xpath/XPathLexer.cpp

* C++ uint16_t to int32_t for ATN.

* rm unnecessary include file, updating project file. get rid of the 0xFFFF does in the C++ deserialization

* rm refs to 0xFFFF in swift

* javascript tests were running as Node...added to ignore list.

* don't distinguish between 16 and 32 bit char sets in serialization; Python2/3  updated to work with this change.

* update C++ to deserialize only 32-bit sets

* 0xFFFF -> -1 for C++ target.

* get other targets to use 32-bit sets in serialization. tests pass locally.

* refactor to reduce code size

* add comment

* oops. comment out call to writeSerializedATNIntegerHistogram(). I wonder if this is why it ran out of memory during testing?

* all but Java, Node, PHP, Go work now for the huge lexer file; I have set them to ignore.  note that the swift target takes over a minute to lex it.  I've turned off Node but it does not seem to terminate but it could terminate eventually.

* all but Java, Node, PHP, Go work now for the huge lexer file; I have set them to ignore.  note that the swift target takes over a minute to lex it.  I've turned off Node but it does not seem to terminate but it could terminate eventually.

* Turn off this big lexer because we get memory errors during continuous integration

* Intermediate commit where I have shuffled around all of the -1 flipping and bumping by two.  work still needs to be done because the token stream rewriter stuff fails. and I assume the other decoding for human readability testing if doesn't work

* convert decode to use int[]; remove dead code. don't use serializeAsChar stuff. more tests pass.

* more tests passing. simplify. When copying atn, must run ATN through serializer to set some state flags.

* 0xFFFD+ are not valid char

* clean up. tests passing now

* huge clean up. Got Java working with 32-bit ATNs!Still working on cleanup but I want to run the tests

* Cleanup the hack I did earlier; everything still seems to work

* Use linux DCO not our old contributors certificate of origin

* remove bump-by-2 code

* clean up per @kvanTTT. Can't test locally on this box. Will see what CI says.

* tweak comment

* Revert "Use linux DCO not our old contributors certificate of origin"

This reverts commit b0f8551c9a674a0a1e045b9a710800df28e72c10.

* see if C++ works in CI for huge ATN
---
 .circleci/scripts/run-tests-cpp.sh            |   1 +
 .circleci/scripts/run-tests-dart.sh           |   1 +
 .circleci/scripts/run-tests-dotnet.sh         |   1 +
 .circleci/scripts/run-tests-go.sh             |   1 +
 .circleci/scripts/run-tests-javascript.sh     |   1 +
 .circleci/scripts/run-tests-php.sh            |   1 +
 .circleci/scripts/run-tests-python2.sh        |   1 +
 .circleci/scripts/run-tests-python3.sh        |   1 +
 .circleci/scripts/run-tests-swift.sh          |   1 +
 .github/scripts-macosx/run-tests-cpp.sh       |   1 +
 .github/scripts-macosx/run-tests-dotnet.sh    |   1 +
 .github/scripts-macosx/run-tests-swift.sh     |   1 +
 .github/scripts-windows/run-tests-csharp.cmd  |   1 +
 .github/scripts-windows/run-tests-dart.cmd    |   1 +
 .github/scripts-windows/run-tests-go.cmd      |   1 +
 .github/scripts-windows/run-tests-java.cmd    |   1 +
 .../scripts-windows/run-tests-javascript.cmd  |   1 +
 .github/scripts-windows/run-tests-php.cmd     |   1 +
 .github/scripts-windows/run-tests-python2.cmd |   1 +
 .github/scripts-windows/run-tests-python3.cmd |   1 +
 runtime-testsuite/pom.xml                     |   2 +
 .../descriptors/LexerExec/TokenType0xFFFF.txt |  13 +
 .../descriptors/LexerExec/UnicodeCharSet.txt  |   2 +-
 .../v4/test/runtime/BaseRuntimeTest.java      |   5 +-
 .../test/runtime/BaseRuntimeTestSupport.java  |   6 +-
 .../runtime/GeneratedLexerDescriptors.java    |  66 +-
 .../java/TestInterpreterDataReader.java       |   8 +-
 .../runtime/java/api/perf/TimeLexerSpeed.java |   2 +-
 .../v4/test/runtime/php/BasePHPTest.java      |   2 +-
 runtime/CSharp/src/Atn/ATNDeserializer.cs     |  23 +-
 runtime/Cpp/runtime/antlr4cpp-vs2013.vcxproj  |   2 -
 .../runtime/antlr4cpp-vs2013.vcxproj.filters  |   3 -
 runtime/Cpp/runtime/antlr4cpp-vs2015.vcxproj  |   1 -
 .../runtime/antlr4cpp-vs2015.vcxproj.filters  |   3 -
 runtime/Cpp/runtime/antlr4cpp-vs2017.vcxproj  |   1 -
 .../runtime/antlr4cpp-vs2017.vcxproj.filters  |   3 -
 runtime/Cpp/runtime/antlr4cpp-vs2019.vcxproj  |   1 -
 .../runtime/antlr4cpp-vs2019.vcxproj.filters  |   3 -
 runtime/Cpp/runtime/src/Parser.cpp            |   4 +-
 runtime/Cpp/runtime/src/Recognizer.h          |   2 +-
 runtime/Cpp/runtime/src/antlr4-runtime.h      |   1 -
 .../Cpp/runtime/src/atn/ATNDeserializer.cpp   |  49 +-
 runtime/Cpp/runtime/src/atn/ATNDeserializer.h |   2 +-
 runtime/Cpp/runtime/src/atn/ATNSerializer.cpp | 589 ------------------
 runtime/Cpp/runtime/src/atn/ATNSerializer.h   |  61 --
 .../src/misc/InterpreterDataReader.cpp        |   4 +-
 .../Cpp/runtime/src/tree/xpath/XPathLexer.cpp |  10 +-
 .../Cpp/runtime/src/tree/xpath/XPathLexer.h   |   2 +-
 .../lib/src/atn/src/atn_deserializer.dart     |  30 +-
 runtime/Go/antlr/atn_deserializer.go          |  41 +-
 runtime/Go/antlr/testing_lexer_b_test.go      |   4 +-
 runtime/Go/antlr/tokenstream_rewriter_test.go |   4 +-
 .../antlr/v4/runtime/atn/ATNDeserializer.java | 229 +++----
 .../antlr/v4/runtime/atn/ATNSerializer.java   | 571 +++++------------
 .../antlr/v4/runtime/atn/ATNSimulator.java    |  62 --
 .../antlr/v4/runtime/misc/IntegerList.java    |   5 +-
 .../runtime/misc/InterpreterDataReader.java   |  46 +-
 .../src/antlr4/atn/ATNDeserializer.js         |  30 +-
 .../Python2/src/antlr4/atn/ATNDeserializer.py |  21 +-
 .../Python3/src/antlr4/atn/ATNDeserializer.py |  26 +-
 .../Sources/Antlr4/atn/ATNDeserializer.swift  |  31 +-
 .../org/antlr/v4/test/tool/ATNDescriber.java  | 203 ++++++
 .../v4/test/tool/TestATNDeserialization.java  |  44 +-
 .../v4/test/tool/TestATNSerialization.java    | 268 ++++----
 ...rSupportTest.java => TestCharSupport.java} |   3 +-
 .../tool/TestUtils.java}                      |   7 +-
 .../v4/tool/templates/codegen/Cpp/Cpp.stg     |  14 +-
 .../antlr/v4/tool/templates/codegen/Go/Go.stg |  10 +-
 .../v4/tool/templates/codegen/Java/Java.stg   |   2 +-
 tool/src/org/antlr/v4/Tool.java               |   4 +-
 tool/src/org/antlr/v4/codegen/Target.java     |   4 +-
 .../antlr/v4/codegen/model/Recognizer.java    |  10 +-
 .../antlr/v4/codegen/model/SerializedATN.java |  33 +-
 .../v4/codegen/model/SerializedJavaATN.java   |  40 ++
 tool/src/org/antlr/v4/tool/Grammar.java       |  36 +-
 .../v4/tool/GrammarParserInterpreter.java     |   7 +-
 76 files changed, 984 insertions(+), 1691 deletions(-)
 create mode 100644 runtime-testsuite/resources/org/antlr/v4/test/runtime/descriptors/LexerExec/TokenType0xFFFF.txt
 delete mode 100755 runtime/Cpp/runtime/src/atn/ATNSerializer.cpp
 delete mode 100755 runtime/Cpp/runtime/src/atn/ATNSerializer.h
 create mode 100644 tool-testsuite/test/org/antlr/v4/test/tool/ATNDescriber.java
 rename tool-testsuite/test/org/antlr/v4/test/tool/{CharSupportTest.java => TestCharSupport.java} (99%)
 rename tool-testsuite/test/org/antlr/v4/{misc/UtilsTest.java => test/tool/TestUtils.java} (97%)
 create mode 100644 tool/src/org/antlr/v4/codegen/model/SerializedJavaATN.java

diff --git a/.circleci/scripts/run-tests-cpp.sh b/.circleci/scripts/run-tests-cpp.sh
index 3d7e52835e..58b59ff283 100755
--- a/.circleci/scripts/run-tests-cpp.sh
+++ b/.circleci/scripts/run-tests-cpp.sh
@@ -3,5 +3,6 @@
 set -euo pipefail
 
 pushd runtime-testsuite
+export MAVEN_OPTS="-Xmx8g"
 mvn -Dparallel=classes -DthreadCount=4 -Dtest=cpp.** test
 popd
diff --git a/.circleci/scripts/run-tests-dart.sh b/.circleci/scripts/run-tests-dart.sh
index 4bd9f78ff0..2f9015df6c 100755
--- a/.circleci/scripts/run-tests-dart.sh
+++ b/.circleci/scripts/run-tests-dart.sh
@@ -6,5 +6,6 @@ dart --version
 
 pushd runtime-testsuite
   echo "running maven tests..."
+  export MAVEN_OPTS="-Xmx8g"
   mvn -Dparallel=classes -DthreadCount=4 -Dtest=dart.** test
 popd
diff --git a/.circleci/scripts/run-tests-dotnet.sh b/.circleci/scripts/run-tests-dotnet.sh
index cb237e6ab5..c0e6eeef6f 100755
--- a/.circleci/scripts/run-tests-dotnet.sh
+++ b/.circleci/scripts/run-tests-dotnet.sh
@@ -3,5 +3,6 @@
 set -euo pipefail
 
 pushd runtime-testsuite
+export MAVEN_OPTS="-Xmx8g"
 mvn -Dparallel=classes -DthreadCount=4 -Dtest=csharp.** test
 popd
diff --git a/.circleci/scripts/run-tests-go.sh b/.circleci/scripts/run-tests-go.sh
index d98b2b43fb..b5317f3e2a 100755
--- a/.circleci/scripts/run-tests-go.sh
+++ b/.circleci/scripts/run-tests-go.sh
@@ -6,5 +6,6 @@ go version
 
 pushd runtime-testsuite
     echo "running maven tests..."
+    export MAVEN_OPTS="-Xmx8g"
     mvn -Dparallel=classes -DthreadCount=4 -Dtest=go.** test
 popd
diff --git a/.circleci/scripts/run-tests-javascript.sh b/.circleci/scripts/run-tests-javascript.sh
index 4923eb4b9f..b0b461fce0 100755
--- a/.circleci/scripts/run-tests-javascript.sh
+++ b/.circleci/scripts/run-tests-javascript.sh
@@ -15,6 +15,7 @@ popd
 pushd runtime-testsuite
 
   echo "running maven tests..."
+  export MAVEN_OPTS="-Xmx8g"
   mvn -Dtest=javascript.** test
   RESULT+=$?
 
diff --git a/.circleci/scripts/run-tests-php.sh b/.circleci/scripts/run-tests-php.sh
index 92e4722bf5..e46ebb3d66 100755
--- a/.circleci/scripts/run-tests-php.sh
+++ b/.circleci/scripts/run-tests-php.sh
@@ -7,5 +7,6 @@ php -v
 php_path=$(which php)
 pushd runtime-testsuite
   echo "running maven tests..."
+  export MAVEN_OPTS="-Xmx8g"
   mvn -DPHP_PATH="${php_path}" -Dparallel=classes -DthreadCount=4 -Dtest=php.** test
 popd
diff --git a/.circleci/scripts/run-tests-python2.sh b/.circleci/scripts/run-tests-python2.sh
index e76f862ad1..772009ac3d 100755
--- a/.circleci/scripts/run-tests-python2.sh
+++ b/.circleci/scripts/run-tests-python2.sh
@@ -17,5 +17,6 @@ python2 --version
 
 pushd runtime-testsuite
   echo "running maven tests..."
+  export MAVEN_OPTS="-Xmx8g"
   mvn -Dparallel=classes -DthreadCount=4 -Dtest=python2.** test
 popd
\ No newline at end of file
diff --git a/.circleci/scripts/run-tests-python3.sh b/.circleci/scripts/run-tests-python3.sh
index c39b88b441..2be5773a1c 100755
--- a/.circleci/scripts/run-tests-python3.sh
+++ b/.circleci/scripts/run-tests-python3.sh
@@ -17,5 +17,6 @@ python3 --version
 
 pushd runtime-testsuite
   echo "running maven tests..."
+  export MAVEN_OPTS="-Xmx8g"
   mvn -Dparallel=classes -DthreadCount=4 -Dtest=python3.** test
 popd
diff --git a/.circleci/scripts/run-tests-swift.sh b/.circleci/scripts/run-tests-swift.sh
index 8c774a8bfc..bc68f84735 100755
--- a/.circleci/scripts/run-tests-swift.sh
+++ b/.circleci/scripts/run-tests-swift.sh
@@ -17,5 +17,6 @@ set -euo pipefail
 
 pushd runtime-testsuite
   echo "running maven tests..."
+  export MAVEN_OPTS="-Xmx8g"
   mvn -Dparallel=classes -DthreadCount=4 -Dtest=swift.** test
 popd
diff --git a/.github/scripts-macosx/run-tests-cpp.sh b/.github/scripts-macosx/run-tests-cpp.sh
index 3d7e52835e..58b59ff283 100755
--- a/.github/scripts-macosx/run-tests-cpp.sh
+++ b/.github/scripts-macosx/run-tests-cpp.sh
@@ -3,5 +3,6 @@
 set -euo pipefail
 
 pushd runtime-testsuite
+export MAVEN_OPTS="-Xmx8g"
 mvn -Dparallel=classes -DthreadCount=4 -Dtest=cpp.** test
 popd
diff --git a/.github/scripts-macosx/run-tests-dotnet.sh b/.github/scripts-macosx/run-tests-dotnet.sh
index d943c6e952..32788e1c85 100755
--- a/.github/scripts-macosx/run-tests-dotnet.sh
+++ b/.github/scripts-macosx/run-tests-dotnet.sh
@@ -13,5 +13,6 @@ dotnet build -c Release -f netstandard2.0 runtime/CSharp/Antlr4.csproj
 
 # run tests
 pushd runtime-testsuite/
+export MAVEN_OPTS="-Xmx8g"
 mvn -Dparallel=classes -DthreadCount=4 -Dtest=csharp.** test
 popd
diff --git a/.github/scripts-macosx/run-tests-swift.sh b/.github/scripts-macosx/run-tests-swift.sh
index 4d4532d1b5..a6d6470179 100755
--- a/.github/scripts-macosx/run-tests-swift.sh
+++ b/.github/scripts-macosx/run-tests-swift.sh
@@ -38,6 +38,7 @@ swift build --version
 cd runtime-testsuite/
 #  mvn -e -Dparallel=classes -DthreadCount=4 -Dtest=swift.** test
 # I don't know swift enough to make it parallel. revert to single threaded
+export MAVEN_OPTS="-Xmx8g"
 mvn -e -Dtest=swift.** test
 rc=$?
 cat target/surefire-reports/*.dumpstream || true
diff --git a/.github/scripts-windows/run-tests-csharp.cmd b/.github/scripts-windows/run-tests-csharp.cmd
index c5e1fb0a77..28fd0d3c99 100644
--- a/.github/scripts-windows/run-tests-csharp.cmd
+++ b/.github/scripts-windows/run-tests-csharp.cmd
@@ -1,5 +1,6 @@
 dotnet build runtime/CSharp/src/Antlr4.csproj -c Release
 dotnet pack runtime/CSharp/src/Antlr4.csproj -c Release
 cd runtime-testsuite
+export MAVEN_OPTS="-Xmx8g"
 mvn -Dparallel=classes -DthreadCount=2 -Dtest=csharp.** test
 cd ..
diff --git a/.github/scripts-windows/run-tests-dart.cmd b/.github/scripts-windows/run-tests-dart.cmd
index 2fd5034ba7..74fa92b52d 100644
--- a/.github/scripts-windows/run-tests-dart.cmd
+++ b/.github/scripts-windows/run-tests-dart.cmd
@@ -1,5 +1,6 @@
 C:\ProgramData\chocolatey\bin\choco.exe -y install dart-sdk
 
 cd runtime-testsuite
+export MAVEN_OPTS="-Xmx8g"
 mvn -Dtest=dart.** test -Dantlr-dart-dart="C:\tools\dart-sdk\bin\dart.exe" -Dantlr-dart-pub="C:\tools\dart-sdk\bin\pub.bat" -Dantlr-dart-dart2native="C:\tools\dart-sdk\bin\dart2native.bat"
 cd ..
diff --git a/.github/scripts-windows/run-tests-go.cmd b/.github/scripts-windows/run-tests-go.cmd
index 1c15fd1c95..d07d7e1ef2 100644
--- a/.github/scripts-windows/run-tests-go.cmd
+++ b/.github/scripts-windows/run-tests-go.cmd
@@ -1,3 +1,4 @@
 cd runtime-testsuite
+export MAVEN_OPTS="-Xmx8g"
 mvn -Dparallel=classes -DthreadCount=2 -Dtest=go.** test
 cd ..
diff --git a/.github/scripts-windows/run-tests-java.cmd b/.github/scripts-windows/run-tests-java.cmd
index 55e9ea5621..87a72b3c72 100755
--- a/.github/scripts-windows/run-tests-java.cmd
+++ b/.github/scripts-windows/run-tests-java.cmd
@@ -1,3 +1,4 @@
 cd runtime-testsuite
+export MAVEN_OPTS="-Xmx8g"
 mvn -Dparallel=classes -DthreadCount=2 -Dtest=java.** test
 cd ..
diff --git a/.github/scripts-windows/run-tests-javascript.cmd b/.github/scripts-windows/run-tests-javascript.cmd
index b8744e8980..81d1eacbde 100644
--- a/.github/scripts-windows/run-tests-javascript.cmd
+++ b/.github/scripts-windows/run-tests-javascript.cmd
@@ -1,3 +1,4 @@
 cd runtime-testsuite
+export MAVEN_OPTS="-Xmx8g"
 mvn -Dparallel=classes -DthreadCount=2 -Dtest=javascript.** test
 cd ..
diff --git a/.github/scripts-windows/run-tests-php.cmd b/.github/scripts-windows/run-tests-php.cmd
index 155cc0472f..1dfe69cdb2 100644
--- a/.github/scripts-windows/run-tests-php.cmd
+++ b/.github/scripts-windows/run-tests-php.cmd
@@ -4,5 +4,6 @@ git clone https://github.com/antlr/antlr-php-runtime.git
 move antlr-php-runtime runtime\PHP
 
 cd runtime-testsuite
+export MAVEN_OPTS="-Xmx8g"
 mvn -Dparallel=classes -DthreadCount=2 -Dtest=php.** test -Dantlr-php-php="C:\tools\php81\php.exe"
 cd ..
diff --git a/.github/scripts-windows/run-tests-python2.cmd b/.github/scripts-windows/run-tests-python2.cmd
index 351355b03a..6f2defe23c 100644
--- a/.github/scripts-windows/run-tests-python2.cmd
+++ b/.github/scripts-windows/run-tests-python2.cmd
@@ -1,3 +1,4 @@
 cd runtime-testsuite
+export MAVEN_OPTS="-Xmx8g"
 mvn -Dparallel=classes -DthreadCount=2 -Dantlr-python2-python="C:\Python27\python.exe" -Dtest=python2.** test
 cd ..
diff --git a/.github/scripts-windows/run-tests-python3.cmd b/.github/scripts-windows/run-tests-python3.cmd
index fe448be018..f08639c70d 100644
--- a/.github/scripts-windows/run-tests-python3.cmd
+++ b/.github/scripts-windows/run-tests-python3.cmd
@@ -1,3 +1,4 @@
 cd runtime-testsuite
+export MAVEN_OPTS="-Xmx8g"
 mvn -Dparallel=classes -DthreadCount=2 -Dantlr-python3-python="C:\Python310\python.exe" -Dtest=python3.** test
 cd ..
diff --git a/runtime-testsuite/pom.xml b/runtime-testsuite/pom.xml
index 50d1bed541..bcba9ceae2 100644
--- a/runtime-testsuite/pom.xml
+++ b/runtime-testsuite/pom.xml
@@ -132,6 +132,8 @@
                 <artifactId>maven-compiler-plugin</artifactId>
                 <configuration>
                     <release>8</release>
+                    <source>9</source>
+                    <target>9</target>
                 </configuration>
             </plugin>
         </plugins>
diff --git a/runtime-testsuite/resources/org/antlr/v4/test/runtime/descriptors/LexerExec/TokenType0xFFFF.txt b/runtime-testsuite/resources/org/antlr/v4/test/runtime/descriptors/LexerExec/TokenType0xFFFF.txt
new file mode 100644
index 0000000000..9018111fe7
--- /dev/null
+++ b/runtime-testsuite/resources/org/antlr/v4/test/runtime/descriptors/LexerExec/TokenType0xFFFF.txt
@@ -0,0 +1,13 @@
+[type]
+Lexer
+
+[grammar]
+lexer grammar L;
+T_FFFF: 'FFFF' -> type(65535);
+
+[input]
+FFFF
+
+[output]
+[@0,0:3='FFFF',<65535>,1:0]
+[@1,4:3='<EOF>',<-1>,1:4]
diff --git a/runtime-testsuite/resources/org/antlr/v4/test/runtime/descriptors/LexerExec/UnicodeCharSet.txt b/runtime-testsuite/resources/org/antlr/v4/test/runtime/descriptors/LexerExec/UnicodeCharSet.txt
index e9918ad741..f697767d92 100644
--- a/runtime-testsuite/resources/org/antlr/v4/test/runtime/descriptors/LexerExec/UnicodeCharSet.txt
+++ b/runtime-testsuite/resources/org/antlr/v4/test/runtime/descriptors/LexerExec/UnicodeCharSet.txt
@@ -6,7 +6,7 @@ Lexer
 
 [grammar]
 lexer grammar L;
-ID : ([A-Z_]|'Ā'..'￾') ([A-Z_0-9]|'Ā'..'￾')*;
+ID : ([A-Z_]|'Ā'..'\uFFFC') ([A-Z_0-9]|'Ā'..'\uFFFC')*; // FFFD+ are not valid char
 
 [input]
 均
diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/BaseRuntimeTest.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/BaseRuntimeTest.java
index f0546226fd..fdd476a6f8 100644
--- a/runtime-testsuite/test/org/antlr/v4/test/runtime/BaseRuntimeTest.java
+++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/BaseRuntimeTest.java
@@ -366,9 +366,10 @@ public static RuntimeTestDescriptor[] getRuntimeTestDescriptors(String group, St
 		}
 
 		if (group.equals("LexerExec")) {
-			descriptors.add(GeneratedLexerDescriptors.getLineSeparatorLfTest(targetName));
-			descriptors.add(GeneratedLexerDescriptors.getLineSeparatorCrLfTest(targetName));
+			descriptors.add(GeneratedLexerDescriptors.getLineSeparatorLfDescriptor(targetName));
+			descriptors.add(GeneratedLexerDescriptors.getLineSeparatorCrLfDescriptor(targetName));
 			descriptors.add(GeneratedLexerDescriptors.getLargeLexerDescriptor(targetName));
+			descriptors.add(GeneratedLexerDescriptors.getAtnStatesSizeMoreThan65535Descriptor(targetName));
 		}
 
 		return descriptors.toArray(new RuntimeTestDescriptor[0]);
diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/BaseRuntimeTestSupport.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/BaseRuntimeTestSupport.java
index 5ab6cea706..578e0cf3e1 100644
--- a/runtime-testsuite/test/org/antlr/v4/test/runtime/BaseRuntimeTestSupport.java
+++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/BaseRuntimeTestSupport.java
@@ -6,6 +6,7 @@
 import org.antlr.v4.runtime.atn.ATN;
 import org.antlr.v4.runtime.atn.ATNDeserializer;
 import org.antlr.v4.runtime.atn.ATNSerializer;
+import org.antlr.v4.runtime.misc.IntegerList;
 import org.antlr.v4.semantics.SemanticPipeline;
 import org.antlr.v4.tool.Grammar;
 import org.antlr.v4.tool.LexerGrammar;
@@ -219,8 +220,9 @@ protected ATN createATN(Grammar g, boolean useSerializer) {
 
 		ATN atn = g.atn;
 		if ( useSerializer ) {
-			char[] serialized = ATNSerializer.getSerializedAsChars(atn, g.getLanguage());
-			return new ATNDeserializer().deserialize(serialized);
+			// sets some flags in ATN
+			IntegerList serialized = ATNSerializer.getSerialized(atn);
+			return new ATNDeserializer().deserialize(serialized.toArray());
 		}
 
 		return atn;
diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/GeneratedLexerDescriptors.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/GeneratedLexerDescriptors.java
index e0a1bb5ea3..b489e36142 100644
--- a/runtime-testsuite/test/org/antlr/v4/test/runtime/GeneratedLexerDescriptors.java
+++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/GeneratedLexerDescriptors.java
@@ -1,7 +1,9 @@
 package org.antlr.v4.test.runtime;
 
+import java.util.*;
+
 public class GeneratedLexerDescriptors {
-	static RuntimeTestDescriptor getLineSeparatorLfTest(String targetName) {
+	static RuntimeTestDescriptor getLineSeparatorLfDescriptor(String targetName) {
 		UniversalRuntimeTestDescriptor result = new UniversalRuntimeTestDescriptor();
 		result.name = "LineSeparatorLf";
 		result.targetName = targetName;
@@ -20,7 +22,7 @@ static RuntimeTestDescriptor getLineSeparatorLfTest(String targetName) {
 		return result;
 	}
 
-	static RuntimeTestDescriptor getLineSeparatorCrLfTest(String targetName) {
+	static RuntimeTestDescriptor getLineSeparatorCrLfDescriptor(String targetName) {
 		UniversalRuntimeTestDescriptor result = new UniversalRuntimeTestDescriptor();
 		result.name = "LineSeparatorCrLf";
 		result.targetName = targetName;
@@ -65,4 +67,64 @@ static RuntimeTestDescriptor getLargeLexerDescriptor(String targetName) {
 				"[@1,5:4='<EOF>',<-1>,1:5]\n";
 		return result;
 	}
+
+	static RuntimeTestDescriptor getAtnStatesSizeMoreThan65535Descriptor(String targetName) {
+		UniversalRuntimeTestDescriptor result = new UniversalRuntimeTestDescriptor();
+		result.name = "AtnStatesSizeMoreThan65535";
+		result.notes = "Regression for https://github.com/antlr/antlr4/issues/1863";
+		result.targetName = targetName;
+		result.testType = "Lexer";
+
+		// I tried playing around with different sizes and I think 1002 works for Go but 1003 does not;
+		// the executing lexer gets a token syntax error for T208 or something like that
+		final int tokensCount = 1024;
+		final String suffix = String.join("", Collections.nCopies(70, "_"));
+
+		String grammarName = "L";
+		StringBuilder grammar = new StringBuilder();
+		grammar.append("lexer grammar ").append(grammarName).append(";\n");
+		grammar.append('\n');
+		StringBuilder input = new StringBuilder();
+		StringBuilder output = new StringBuilder();
+		int startOffset;
+		int stopOffset = -2;
+		for (int i = 0; i < tokensCount; i++) {
+			String ruleName = String.format("T_%06d", i);
+			String value = ruleName+suffix;
+			grammar.append(ruleName).append(": '").append(value).append("';\n");
+			input.append(value).append('\n');
+
+			startOffset = stopOffset + 2;
+			stopOffset += value.length() + 1;
+
+			output.append("[@").append(i).append(',').append(startOffset).append(':').append(stopOffset)
+					.append("='").append(value).append("',<").append(i + 1).append(">,").append(i + 1)
+					.append(":0]\n");
+		}
+
+		grammar.append("\n");
+		grammar.append("WS: [ \\t\\r\\n]+ -> skip;\n");
+
+		startOffset = stopOffset + 2;
+		stopOffset = startOffset - 1;
+		output.append("[@").append(tokensCount).append(',').append(startOffset).append(':').append(stopOffset)
+				.append("='<EOF>',<-1>,").append(tokensCount + 1).append(":0]\n");
+
+		result.grammar = grammar.toString();
+		result.grammarName = grammarName;
+		result.input = input.toString();
+		result.output = output.toString();
+
+		// We seem to get memory errors and so I am turning this off during CI
+		List<String> all = Arrays.asList(
+//				"CSharp", "Python2", "Python3", "Cpp", "Go", "PHP", "Swift", "Java", "JavaScript", "Node", "Dart"
+				"CSharp", "Python2", "Python3", "Go", "PHP", "Swift", "JavaScript", "Node", "Dart"
+		);
+		result.skipTargets.addAll(all);
+
+//		result.skipTargets.add("Node"); // doesn't terminate
+//		result.skipTargets.add("PHP"); // "Allowed memory size of 134217728 bytes exhausted (tried to allocate 16384 bytes)..."
+//		result.skipTargets.add("Go"); // syntax error
+		return result;
+	}
 }
diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/java/TestInterpreterDataReader.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/java/TestInterpreterDataReader.java
index 65af466a0a..d11f6904a3 100644
--- a/runtime-testsuite/test/org/antlr/v4/test/runtime/java/TestInterpreterDataReader.java
+++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/java/TestInterpreterDataReader.java
@@ -6,20 +6,18 @@
 import org.antlr.v4.runtime.atn.ATN;
 import org.antlr.v4.runtime.atn.ATNDeserializer;
 import org.antlr.v4.runtime.atn.ATNSerializer;
+import org.antlr.v4.runtime.misc.IntegerList;
 import org.antlr.v4.runtime.misc.InterpreterDataReader;
-import org.antlr.v4.runtime.misc.Utils;
 import org.antlr.v4.tool.Grammar;
 import org.junit.Assert;
 import org.junit.Test;
 
 import java.io.IOException;
 import java.lang.reflect.Field;
-import java.net.URL;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.ArrayList;
-import java.util.Collection;
 import java.util.List;
 
 /** This file represents a simple sanity checks on the parsing of the .interp file
@@ -76,8 +74,8 @@ public void testParseFile() throws IOException, NoSuchFieldException, IllegalAcc
 		Assert.assertNull(channels);
 		Assert.assertNull(modes);
 
-        char[] atnChars = ATNSerializer.getSerializedAsChars(atn, g.getLanguage());
-		Assert.assertEquals(ATNDeserializer.SERIALIZED_VERSION, atnChars[0]);
+		IntegerList serialized = ATNSerializer.getSerialized(atn);
+		Assert.assertEquals(ATNDeserializer.SERIALIZED_VERSION, serialized.get(0));
     }
 
     private <T> List<T> castList(Object obj, Class<T> clazz) {
diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/java/api/perf/TimeLexerSpeed.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/java/api/perf/TimeLexerSpeed.java
index e129fb145a..e653f8b59f 100644
--- a/runtime-testsuite/test/org/antlr/v4/test/runtime/java/api/perf/TimeLexerSpeed.java
+++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/java/api/perf/TimeLexerSpeed.java
@@ -55,7 +55,7 @@
  *
  *  Sample output on OS X with 4 GHz Intel Core i7 (us == microseconds, 1/1000 of a millisecond):
  *
- Java VM args: -Xms2G -Xmx2G
+ Java VM args: -Xms2G -Xmx8g
  Warming up Java compiler....
  load_legacy_java_ascii_file average time    53us size  58384b over 3500 loads of 29038 symbols from Parser.java
  load_legacy_java_ascii_file average time    27us size  15568b over 3500 loads of  7625 symbols from RuleContext.java
diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/php/BasePHPTest.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/php/BasePHPTest.java
index d906ff0cd0..ec6d4fda8e 100644
--- a/runtime-testsuite/test/org/antlr/v4/test/runtime/php/BasePHPTest.java
+++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/php/BasePHPTest.java
@@ -244,7 +244,7 @@ private String locateTool(String tool) {
 			return phpPath;
 		}
 
-		String[] roots = {"/usr/local/bin/", "/opt/local/bin", "/usr/bin/"};
+		String[] roots = {"/usr/local/bin/", "/opt/local/bin", "/opt/homebrew/bin/", "/usr/bin/"};
 
 		for (String root: roots) {
 			if (new File(root + tool).exists()) {
diff --git a/runtime/CSharp/src/Atn/ATNDeserializer.cs b/runtime/CSharp/src/Atn/ATNDeserializer.cs
index 9cefab68de..7f47677062 100644
--- a/runtime/CSharp/src/Atn/ATNDeserializer.cs
+++ b/runtime/CSharp/src/Atn/ATNDeserializer.cs
@@ -45,8 +45,7 @@ public virtual ATN Deserialize(int[] data)
 			ReadRules (atn);
 			ReadModes (atn);
 			IList<IntervalSet> sets = new List<IntervalSet>();
-			ReadSets (atn, sets, ReadInt);
-	        ReadSets (atn, sets, ReadInt32);
+			ReadSets (atn, sets);
 	        ReadEdges (atn, sets);
 			ReadDecisions (atn);
 			ReadLexerActions (atn);
@@ -190,15 +189,7 @@ protected internal virtual void ReadLexerActions(ATN atn)
 				{
 					LexerActionType actionType = (LexerActionType)ReadInt();
 					int data1 = ReadInt();
-					if (data1 == unchecked((int)(0xFFFF)))
-					{
-						data1 = -1;
-					}
 					int data2 = ReadInt();
-					if (data2 == unchecked((int)(0xFFFF)))
-					{
-						data2 = -1;
-					}
 					ILexerAction lexerAction = LexerActionFactory(actionType, data1, data2);
 					atn.lexerActions[i_10] = lexerAction;
 				}
@@ -309,7 +300,7 @@ protected internal virtual void ReadEdges(ATN atn, IList<IntervalSet> sets)
 			}
 		}
 
-		protected internal virtual void ReadSets(ATN atn, IList<IntervalSet> sets, System.Func<int> readUnicode)
+		protected internal virtual void ReadSets(ATN atn, IList<IntervalSet> sets)
 		{
 			//
 			// SETS
@@ -327,7 +318,7 @@ protected internal virtual void ReadSets(ATN atn, IList<IntervalSet> sets, Syste
 				}
 				for (int j = 0; j < nintervals; j++)
 				{
-					set.Add(readUnicode(), readUnicode());
+					set.Add(ReadInt(), ReadInt());
 				}
 			}
 		}
@@ -369,9 +360,6 @@ protected internal virtual void ReadRules(ATN atn)
 				atn.ruleToStartState[i_5] = startState;
 				if (atn.grammarType == ATNType.Lexer) {
 					int tokenType = ReadInt ();
-					if (tokenType == unchecked((int)(0xFFFF))) {
-						tokenType = TokenConstants.EOF;
-					}
 					atn.ruleToTokenType [i_5] = tokenType;
 				}
 			}
@@ -967,11 +955,6 @@ protected internal int ReadInt()
 			return data[p++];
         }
 
-        protected internal int ReadInt32()
-        {
-			return (int)data[p++] | ((int)data[p++] << 16);
-        }
-
         [return: NotNull]
         protected internal virtual Transition EdgeFactory(ATN atn, TransitionType type, int src, int trg, int arg1, int arg2, int arg3, IList<IntervalSet> sets)
         {
diff --git a/runtime/Cpp/runtime/antlr4cpp-vs2013.vcxproj b/runtime/Cpp/runtime/antlr4cpp-vs2013.vcxproj
index 15e3a3c975..83f76113ef 100644
--- a/runtime/Cpp/runtime/antlr4cpp-vs2013.vcxproj
+++ b/runtime/Cpp/runtime/antlr4cpp-vs2013.vcxproj
@@ -493,8 +493,6 @@
     <ClInclude Include="src\atn\ATNConfigSet.h" />
     <ClInclude Include="src\atn\ATNDeserializationOptions.h" />
     <ClInclude Include="src\atn\ATNDeserializer.h" />
-    <ClInclude Include="src\atn\ATNSerializer.h" />
-    <ClInclude Include="src\atn\ATNSimulator.h" />
     <ClInclude Include="src\atn\ATNState.h" />
     <ClInclude Include="src\atn\ATNType.h" />
     <ClInclude Include="src\atn\AtomTransition.h" />
diff --git a/runtime/Cpp/runtime/antlr4cpp-vs2013.vcxproj.filters b/runtime/Cpp/runtime/antlr4cpp-vs2013.vcxproj.filters
index 499a82ed4d..0105b80e74 100644
--- a/runtime/Cpp/runtime/antlr4cpp-vs2013.vcxproj.filters
+++ b/runtime/Cpp/runtime/antlr4cpp-vs2013.vcxproj.filters
@@ -285,9 +285,6 @@
     <ClInclude Include="src\atn\ATNDeserializer.h">
       <Filter>Header Files\atn</Filter>
     </ClInclude>
-    <ClInclude Include="src\atn\ATNSerializer.h">
-      <Filter>Header Files\atn</Filter>
-    </ClInclude>
     <ClInclude Include="src\atn\ATNSimulator.h">
       <Filter>Header Files\atn</Filter>
     </ClInclude>
diff --git a/runtime/Cpp/runtime/antlr4cpp-vs2015.vcxproj b/runtime/Cpp/runtime/antlr4cpp-vs2015.vcxproj
index a90095d30e..8fb5cf9806 100644
--- a/runtime/Cpp/runtime/antlr4cpp-vs2015.vcxproj
+++ b/runtime/Cpp/runtime/antlr4cpp-vs2015.vcxproj
@@ -507,7 +507,6 @@
     <ClInclude Include="src\atn\ATNConfigSet.h" />
     <ClInclude Include="src\atn\ATNDeserializationOptions.h" />
     <ClInclude Include="src\atn\ATNDeserializer.h" />
-    <ClInclude Include="src\atn\ATNSerializer.h" />
     <ClInclude Include="src\atn\ATNSimulator.h" />
     <ClInclude Include="src\atn\ATNState.h" />
     <ClInclude Include="src\atn\ATNType.h" />
diff --git a/runtime/Cpp/runtime/antlr4cpp-vs2015.vcxproj.filters b/runtime/Cpp/runtime/antlr4cpp-vs2015.vcxproj.filters
index cc1986923d..8573ee8373 100644
--- a/runtime/Cpp/runtime/antlr4cpp-vs2015.vcxproj.filters
+++ b/runtime/Cpp/runtime/antlr4cpp-vs2015.vcxproj.filters
@@ -285,9 +285,6 @@
     <ClInclude Include="src\atn\ATNDeserializer.h">
       <Filter>Header Files\atn</Filter>
     </ClInclude>
-    <ClInclude Include="src\atn\ATNSerializer.h">
-      <Filter>Header Files\atn</Filter>
-    </ClInclude>
     <ClInclude Include="src\atn\ATNSimulator.h">
       <Filter>Header Files\atn</Filter>
     </ClInclude>
diff --git a/runtime/Cpp/runtime/antlr4cpp-vs2017.vcxproj b/runtime/Cpp/runtime/antlr4cpp-vs2017.vcxproj
index eb96aa8dfb..8ad1d01b6f 100644
--- a/runtime/Cpp/runtime/antlr4cpp-vs2017.vcxproj
+++ b/runtime/Cpp/runtime/antlr4cpp-vs2017.vcxproj
@@ -507,7 +507,6 @@
     <ClInclude Include="src\atn\ATNConfigSet.h" />
     <ClInclude Include="src\atn\ATNDeserializationOptions.h" />
     <ClInclude Include="src\atn\ATNDeserializer.h" />
-    <ClInclude Include="src\atn\ATNSerializer.h" />
     <ClInclude Include="src\atn\ATNSimulator.h" />
     <ClInclude Include="src\atn\ATNState.h" />
     <ClInclude Include="src\atn\ATNType.h" />
diff --git a/runtime/Cpp/runtime/antlr4cpp-vs2017.vcxproj.filters b/runtime/Cpp/runtime/antlr4cpp-vs2017.vcxproj.filters
index cc1986923d..8573ee8373 100644
--- a/runtime/Cpp/runtime/antlr4cpp-vs2017.vcxproj.filters
+++ b/runtime/Cpp/runtime/antlr4cpp-vs2017.vcxproj.filters
@@ -285,9 +285,6 @@
     <ClInclude Include="src\atn\ATNDeserializer.h">
       <Filter>Header Files\atn</Filter>
     </ClInclude>
-    <ClInclude Include="src\atn\ATNSerializer.h">
-      <Filter>Header Files\atn</Filter>
-    </ClInclude>
     <ClInclude Include="src\atn\ATNSimulator.h">
       <Filter>Header Files\atn</Filter>
     </ClInclude>
diff --git a/runtime/Cpp/runtime/antlr4cpp-vs2019.vcxproj b/runtime/Cpp/runtime/antlr4cpp-vs2019.vcxproj
index d07ad0b0e0..d5df910b8a 100644
--- a/runtime/Cpp/runtime/antlr4cpp-vs2019.vcxproj
+++ b/runtime/Cpp/runtime/antlr4cpp-vs2019.vcxproj
@@ -515,7 +515,6 @@
     <ClInclude Include="src\atn\ATNConfigSet.h" />
     <ClInclude Include="src\atn\ATNDeserializationOptions.h" />
     <ClInclude Include="src\atn\ATNDeserializer.h" />
-    <ClInclude Include="src\atn\ATNSerializer.h" />
     <ClInclude Include="src\atn\ATNSimulator.h" />
     <ClInclude Include="src\atn\ATNState.h" />
     <ClInclude Include="src\atn\ATNType.h" />
diff --git a/runtime/Cpp/runtime/antlr4cpp-vs2019.vcxproj.filters b/runtime/Cpp/runtime/antlr4cpp-vs2019.vcxproj.filters
index cc1986923d..8573ee8373 100644
--- a/runtime/Cpp/runtime/antlr4cpp-vs2019.vcxproj.filters
+++ b/runtime/Cpp/runtime/antlr4cpp-vs2019.vcxproj.filters
@@ -285,9 +285,6 @@
     <ClInclude Include="src\atn\ATNDeserializer.h">
       <Filter>Header Files\atn</Filter>
     </ClInclude>
-    <ClInclude Include="src\atn\ATNSerializer.h">
-      <Filter>Header Files\atn</Filter>
-    </ClInclude>
     <ClInclude Include="src\atn\ATNSimulator.h">
       <Filter>Header Files\atn</Filter>
     </ClInclude>
diff --git a/runtime/Cpp/runtime/src/Parser.cpp b/runtime/Cpp/runtime/src/Parser.cpp
index b07ff4efa9..bcd36b44fb 100755
--- a/runtime/Cpp/runtime/src/Parser.cpp
+++ b/runtime/Cpp/runtime/src/Parser.cpp
@@ -38,7 +38,7 @@ struct BypassAltsAtnCache final {
   /// bypass alternatives.
   ///
   /// <seealso cref= ATNDeserializationOptions#isGenerateRuleBypassTransitions() </seealso>
-  std::map<std::vector<uint16_t>, std::unique_ptr<const atn::ATN>> map;
+  std::map<std::vector<int32_t>, std::unique_ptr<const atn::ATN>> map;
 };
 
 BypassAltsAtnCache* getBypassAltsAtnCache() {
@@ -229,7 +229,7 @@ TokenFactory<CommonToken>* Parser::getTokenFactory() {
 
 
 const atn::ATN& Parser::getATNWithBypassAlts() {
-  const std::vector<uint16_t> &serializedAtn = getSerializedATN();
+  const std::vector<int32_t> &serializedAtn = getSerializedATN();
   if (serializedAtn.empty()) {
     throw UnsupportedOperationException("The current parser does not support an ATN with bypass alternatives.");
   }
diff --git a/runtime/Cpp/runtime/src/Recognizer.h b/runtime/Cpp/runtime/src/Recognizer.h
index 932e726675..28abfc8741 100755
--- a/runtime/Cpp/runtime/src/Recognizer.h
+++ b/runtime/Cpp/runtime/src/Recognizer.h
@@ -53,7 +53,7 @@ namespace antlr4 {
     /// For interpreters, we don't know their serialized ATN despite having
     /// created the interpreter from it.
     /// </summary>
-    virtual const std::vector<uint16_t>& getSerializedATN() const {
+    virtual const std::vector<int32_t>& getSerializedATN() const {
       throw "there is no serialized ATN";
     }
 
diff --git a/runtime/Cpp/runtime/src/antlr4-runtime.h b/runtime/Cpp/runtime/src/antlr4-runtime.h
index 70de934f50..193ea68964 100644
--- a/runtime/Cpp/runtime/src/antlr4-runtime.h
+++ b/runtime/Cpp/runtime/src/antlr4-runtime.h
@@ -57,7 +57,6 @@
 #include "atn/ATNConfigSet.h"
 #include "atn/ATNDeserializationOptions.h"
 #include "atn/ATNDeserializer.h"
-#include "atn/ATNSerializer.h"
 #include "atn/ATNSimulator.h"
 #include "atn/ATNState.h"
 #include "atn/ATNType.h"
diff --git a/runtime/Cpp/runtime/src/atn/ATNDeserializer.cpp b/runtime/Cpp/runtime/src/atn/ATNDeserializer.cpp
index 285167247b..3b5563c975 100755
--- a/runtime/Cpp/runtime/src/atn/ATNDeserializer.cpp
+++ b/runtime/Cpp/runtime/src/atn/ATNDeserializer.cpp
@@ -221,28 +221,14 @@ namespace {
     return s;
   }
 
-  uint32_t deserializeInt32(const std::vector<uint16_t>& data, size_t offset) {
-    return static_cast<uint32_t>(data[offset]) | (static_cast<uint32_t>(data[offset + 1]) << 16);
-  }
-
-  ssize_t readUnicodeInt(const std::vector<uint16_t>& data, int& p) {
+  ssize_t readUnicodeInt32(const std::vector<int32_t>& data, int& p) {
     return static_cast<ssize_t>(data[p++]);
   }
 
-  ssize_t readUnicodeInt32(const std::vector<uint16_t>& data, int& p) {
-    auto result = deserializeInt32(data, p);
-    p += 2;
-    return static_cast<ssize_t>(result);
-  }
-
-  // We templatize this on the function type so the optimizer can inline
-  // the 16- or 32-bit readUnicodeInt/readUnicodeInt32 as needed.
-  template <typename F>
   void deserializeSets(
-    const std::vector<uint16_t>& data,
+    const std::vector<int32_t>& data,
     int& p,
-    std::vector<misc::IntervalSet>& sets,
-    F readUnicode) {
+    std::vector<misc::IntervalSet>& sets) {
     size_t nsets = data[p++];
     sets.reserve(sets.size() + nsets);
     for (size_t i = 0; i < nsets; i++) {
@@ -255,8 +241,8 @@ namespace {
       }
 
       for (size_t j = 0; j < nintervals; j++) {
-        auto a = readUnicode(data, p);
-        auto b = readUnicode(data, p);
+        auto a = readUnicodeInt32(data, p);
+        auto b = readUnicodeInt32(data, p);
         set.add(a, b);
       }
       sets.push_back(set);
@@ -269,7 +255,7 @@ ATNDeserializer::ATNDeserializer() : ATNDeserializer(ATNDeserializationOptions::
 
 ATNDeserializer::ATNDeserializer(ATNDeserializationOptions deserializationOptions) : _deserializationOptions(std::move(deserializationOptions)) {}
 
-std::unique_ptr<ATN> ATNDeserializer::deserialize(const std::vector<uint16_t>& data) const {
+std::unique_ptr<ATN> ATNDeserializer::deserialize(const std::vector<int32_t>& data) const {
   int p = 0;
   int version = data[p++];
   if (version != SERIALIZED_VERSION) {
@@ -301,10 +287,6 @@ std::unique_ptr<ATN> ATNDeserializer::deserialize(const std::vector<uint16_t>& d
       }
 
       size_t ruleIndex = data[p++];
-      if (ruleIndex == 0xFFFF) {
-        ruleIndex = INVALID_INDEX;
-      }
-
       ATNState *s = stateFactory(stype, ruleIndex);
       if (stype == ATNStateType::LOOP_END) { // special case
         int loopBackStateNumber = data[p++];
@@ -352,10 +334,6 @@ std::unique_ptr<ATN> ATNDeserializer::deserialize(const std::vector<uint16_t>& d
     atn->ruleToStartState.push_back(startState);
     if (atn->grammarType == ATNType::LEXER) {
       size_t tokenType = data[p++];
-      if (tokenType == 0xFFFF) {
-        tokenType = Token::EOF;
-      }
-
       atn->ruleToTokenType.push_back(tokenType);
     }
   }
@@ -387,12 +365,7 @@ std::unique_ptr<ATN> ATNDeserializer::deserialize(const std::vector<uint16_t>& d
   {
     std::vector<misc::IntervalSet> sets;
 
-    // First, deserialize sets with 16-bit arguments <= U+FFFF.
-    deserializeSets(data, p, sets, readUnicodeInt);
-
-    // Next, deserialize sets with 32-bit arguments <= U+10FFFF.
-    deserializeSets(data, p, sets, readUnicodeInt32);
-
+    deserializeSets(data, p, sets);
     sets.shrink_to_fit();
 
     //
@@ -492,15 +465,7 @@ std::unique_ptr<ATN> ATNDeserializer::deserialize(const std::vector<uint16_t>& d
     for (size_t i = 0; i < atn->lexerActions.size(); i++) {
       LexerActionType actionType = static_cast<LexerActionType>(data[p++]);
       int data1 = data[p++];
-      if (data1 == 0xFFFF) {
-        data1 = -1;
-      }
-
       int data2 = data[p++];
-      if (data2 == 0xFFFF) {
-        data2 = -1;
-      }
-
       atn->lexerActions[i] = lexerActionFactory(actionType, data1, data2);
     }
   }
diff --git a/runtime/Cpp/runtime/src/atn/ATNDeserializer.h b/runtime/Cpp/runtime/src/atn/ATNDeserializer.h
index 35276ed782..2442d4b7bd 100755
--- a/runtime/Cpp/runtime/src/atn/ATNDeserializer.h
+++ b/runtime/Cpp/runtime/src/atn/ATNDeserializer.h
@@ -20,7 +20,7 @@ namespace atn {
 
     explicit ATNDeserializer(ATNDeserializationOptions deserializationOptions);
 
-    std::unique_ptr<ATN> deserialize(const std::vector<uint16_t> &input) const;
+    std::unique_ptr<ATN> deserialize(const std::vector<int32_t> &input) const;
     void verifyATN(const ATN &atn) const;
 
   private:
diff --git a/runtime/Cpp/runtime/src/atn/ATNSerializer.cpp b/runtime/Cpp/runtime/src/atn/ATNSerializer.cpp
deleted file mode 100755
index dc62787b9c..0000000000
--- a/runtime/Cpp/runtime/src/atn/ATNSerializer.cpp
+++ /dev/null
@@ -1,589 +0,0 @@
-﻿/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
- * Use of this file is governed by the BSD 3-clause license that
- * can be found in the LICENSE.txt file in the project root.
- */
-
-#include "misc/IntervalSet.h"
-#include "atn/ATNType.h"
-#include "atn/ATNState.h"
-#include "atn/BlockEndState.h"
-
-#include "atn/DecisionState.h"
-#include "atn/RuleStartState.h"
-#include "atn/LoopEndState.h"
-#include "atn/BlockStartState.h"
-#include "atn/Transition.h"
-#include "atn/SetTransition.h"
-#include "Token.h"
-#include "misc/Interval.h"
-#include "atn/ATN.h"
-
-#include "atn/RuleTransition.h"
-#include "atn/PrecedencePredicateTransition.h"
-#include "atn/PredicateTransition.h"
-#include "atn/RangeTransition.h"
-#include "atn/AtomTransition.h"
-#include "atn/ActionTransition.h"
-#include "atn/TransitionType.h"
-#include "atn/ATNDeserializer.h"
-
-#include "atn/TokensStartState.h"
-#include "Exceptions.h"
-#include "support/CPPUtils.h"
-
-#include "atn/LexerChannelAction.h"
-#include "atn/LexerCustomAction.h"
-#include "atn/LexerModeAction.h"
-#include "atn/LexerPushModeAction.h"
-#include "atn/LexerTypeAction.h"
-
-#include "Exceptions.h"
-
-#include "atn/ATNSerializer.h"
-
-using namespace antlrcpp;
-using namespace antlr4::atn;
-
-ATNSerializer::ATNSerializer(ATN *atn) { this->atn = atn; }
-
-ATNSerializer::ATNSerializer(ATN *atn, const std::vector<std::string> &tokenNames) {
-  this->atn = atn;
-  _tokenNames = tokenNames;
-}
-
-ATNSerializer::~ATNSerializer() { }
-
-std::vector<size_t> ATNSerializer::serialize() {
-  std::vector<size_t> data;
-  data.push_back(ATNDeserializer::SERIALIZED_VERSION);
-
-  // convert grammar type to ATN const to avoid dependence on ANTLRParser
-  data.push_back(static_cast<size_t>(atn->grammarType));
-  data.push_back(atn->maxTokenType);
-  size_t nedges = 0;
-
-  std::unordered_map<misc::IntervalSet, int> setIndices;
-  std::vector<misc::IntervalSet> sets;
-
-  // dump states, count edges and collect sets while doing so
-  std::vector<size_t> nonGreedyStates;
-  std::vector<size_t> precedenceStates;
-  data.push_back(atn->states.size());
-  for (ATNState *s : atn->states) {
-    if (s == nullptr) {  // might be optimized away
-      data.push_back(0);
-      continue;
-    }
-
-    size_t stateType = static_cast<size_t>(s->getStateType());
-    if (is<DecisionState *>(s) && (static_cast<DecisionState *>(s))->nonGreedy) {
-      nonGreedyStates.push_back(s->stateNumber);
-    }
-
-    if (is<RuleStartState *>(s) && (static_cast<RuleStartState *>(s))->isLeftRecursiveRule) {
-      precedenceStates.push_back(s->stateNumber);
-    }
-
-    data.push_back(stateType);
-
-    if (s->ruleIndex == INVALID_INDEX) {
-      data.push_back(0xFFFF);
-    }
-    else {
-      data.push_back(s->ruleIndex);
-    }
-
-    if (s->getStateType() == ATNStateType::LOOP_END) {
-      data.push_back((static_cast<LoopEndState *>(s))->loopBackState->stateNumber);
-    }
-    else if (is<BlockStartState *>(s)) {
-      data.push_back((static_cast<BlockStartState *>(s))->endState->stateNumber);
-    }
-
-    if (s->getStateType() != ATNStateType::RULE_STOP) {
-      // the deserializer can trivially derive these edges, so there's no need
-      // to serialize them
-      nedges += s->transitions.size();
-    }
-
-    for (size_t i = 0; i < s->transitions.size(); i++) {
-      const Transition *t = s->transitions[i].get();
-      TransitionType edgeType = t->getTransitionType();
-      if (edgeType == TransitionType::SET || edgeType == TransitionType::NOT_SET) {
-        const SetTransition *st = static_cast<const SetTransition *>(t);
-        if (setIndices.find(st->set) == setIndices.end()) {
-          sets.push_back(st->set);
-          setIndices.insert({ st->set, (int)sets.size() - 1 });
-        }
-      }
-    }
-  }
-
-  // non-greedy states
-  data.push_back(nonGreedyStates.size());
-  for (size_t i = 0; i < nonGreedyStates.size(); i++) {
-    data.push_back(nonGreedyStates.at(i));
-  }
-
-  // precedence states
-  data.push_back(precedenceStates.size());
-  for (size_t i = 0; i < precedenceStates.size(); i++) {
-    data.push_back(precedenceStates.at(i));
-  }
-
-  size_t nrules = atn->ruleToStartState.size();
-  data.push_back(nrules);
-  for (size_t r = 0; r < nrules; r++) {
-    ATNState *ruleStartState = atn->ruleToStartState[r];
-    data.push_back(ruleStartState->stateNumber);
-    if (atn->grammarType == ATNType::LEXER) {
-      if (atn->ruleToTokenType[r] == Token::EOF) {
-        data.push_back(0xFFFF);
-      }
-      else {
-        data.push_back(atn->ruleToTokenType[r]);
-      }
-    }
-  }
-
-  size_t nmodes = atn->modeToStartState.size();
-  data.push_back(nmodes);
-  if (nmodes > 0) {
-    for (const auto &modeStartState : atn->modeToStartState) {
-      data.push_back(modeStartState->stateNumber);
-    }
-  }
-
-  size_t nsets = sets.size();
-  data.push_back(nsets);
-  for (const auto &set : sets) {
-    bool containsEof = set.contains(Token::EOF);
-    if (containsEof && set.getIntervals().at(0).b == -1) {
-      data.push_back(set.getIntervals().size() - 1);
-    }
-    else {
-      data.push_back(set.getIntervals().size());
-    }
-
-    data.push_back(containsEof ? 1 : 0);
-    for (const auto &interval : set.getIntervals()) {
-      if (interval.a == -1) {
-        if (interval.b == -1) {
-          continue;
-        } else {
-          data.push_back(0);
-        }
-      }
-      else {
-        data.push_back(interval.a);
-      }
-
-      data.push_back(interval.b);
-    }
-  }
-
-  data.push_back(nedges);
-  for (ATNState *s : atn->states) {
-    if (s == nullptr) {
-      // might be optimized away
-      continue;
-    }
-
-    if (s->getStateType() == ATNStateType::RULE_STOP) {
-      continue;
-    }
-
-    for (size_t i = 0; i < s->transitions.size(); i++) {
-      const Transition *t = s->transitions[i].get();
-
-      if (atn->states[t->target->stateNumber] == nullptr) {
-        throw IllegalStateException("Cannot serialize a transition to a removed state.");
-      }
-
-      size_t src = s->stateNumber;
-      size_t trg = t->target->stateNumber;
-      TransitionType edgeType = t->getTransitionType();
-      size_t arg1 = 0;
-      size_t arg2 = 0;
-      size_t arg3 = 0;
-      switch (edgeType) {
-        case TransitionType::RULE:
-          trg = (static_cast<const RuleTransition *>(t))->followState->stateNumber;
-          arg1 = (static_cast<const RuleTransition *>(t))->target->stateNumber;
-          arg2 = (static_cast<const RuleTransition *>(t))->ruleIndex;
-          arg3 = (static_cast<const RuleTransition *>(t))->precedence;
-          break;
-        case TransitionType::PRECEDENCE:
-        {
-          const PrecedencePredicateTransition *ppt =
-          static_cast<const PrecedencePredicateTransition *>(t);
-          arg1 = ppt->precedence;
-        }
-          break;
-        case TransitionType::PREDICATE:
-        {
-          const PredicateTransition *pt = static_cast<const PredicateTransition *>(t);
-          arg1 = pt->ruleIndex;
-          arg2 = pt->predIndex;
-          arg3 = pt->isCtxDependent ? 1 : 0;
-        }
-          break;
-        case TransitionType::RANGE:
-          arg1 = (static_cast<const RangeTransition *>(t))->from;
-          arg2 = (static_cast<const RangeTransition *>(t))->to;
-          if (arg1 == Token::EOF) {
-            arg1 = 0;
-            arg3 = 1;
-          }
-
-          break;
-        case TransitionType::ATOM:
-          arg1 = (static_cast<const AtomTransition *>(t))->_label;
-          if (arg1 == Token::EOF) {
-            arg1 = 0;
-            arg3 = 1;
-          }
-
-          break;
-        case TransitionType::ACTION:
-        {
-          const ActionTransition *at = static_cast<const ActionTransition *>(t);
-          arg1 = at->ruleIndex;
-          arg2 = at->actionIndex;
-          if (arg2 == INVALID_INDEX) {
-            arg2 = 0xFFFF;
-          }
-
-          arg3 = at->isCtxDependent ? 1 : 0;
-        }
-          break;
-        case TransitionType::SET:
-          arg1 = setIndices[(static_cast<const SetTransition *>(t))->set];
-          break;
-
-        case TransitionType::NOT_SET:
-          arg1 = setIndices[(static_cast<const SetTransition *>(t))->set];
-          break;
-
-        default:
-          break;
-      }
-
-      data.push_back(src);
-      data.push_back(trg);
-      data.push_back(static_cast<size_t>(edgeType));
-      data.push_back(arg1);
-      data.push_back(arg2);
-      data.push_back(arg3);
-    }
-  }
-
-  size_t ndecisions = atn->decisionToState.size();
-  data.push_back(ndecisions);
-  for (DecisionState *decStartState : atn->decisionToState) {
-    data.push_back(decStartState->stateNumber);
-  }
-
-  // LEXER ACTIONS
-  if (atn->grammarType == ATNType::LEXER) {
-    data.push_back(atn->lexerActions.size());
-    for (const auto &action : atn->lexerActions) {
-      data.push_back(static_cast<size_t>(action->getActionType()));
-      switch (action->getActionType()) {
-        case LexerActionType::CHANNEL:
-        {
-          int channel = std::dynamic_pointer_cast<LexerChannelAction>(action)->getChannel();
-          data.push_back(channel != -1 ? channel : 0xFFFF);
-          data.push_back(0);
-          break;
-        }
-
-        case LexerActionType::CUSTOM:
-        {
-          size_t ruleIndex = std::dynamic_pointer_cast<LexerCustomAction>(action)->getRuleIndex();
-          size_t actionIndex = std::dynamic_pointer_cast<LexerCustomAction>(action)->getActionIndex();
-          data.push_back(ruleIndex != INVALID_INDEX ? ruleIndex : 0xFFFF);
-          data.push_back(actionIndex != INVALID_INDEX ? actionIndex : 0xFFFF);
-          break;
-        }
-
-        case LexerActionType::MODE:
-        {
-          int mode = std::dynamic_pointer_cast<LexerModeAction>(action)->getMode();
-          data.push_back(mode != -1 ? mode : 0xFFFF);
-          data.push_back(0);
-          break;
-        }
-
-        case LexerActionType::MORE:
-          data.push_back(0);
-          data.push_back(0);
-          break;
-
-        case LexerActionType::POP_MODE:
-          data.push_back(0);
-          data.push_back(0);
-          break;
-
-        case LexerActionType::PUSH_MODE:
-        {
-          int mode = std::dynamic_pointer_cast<LexerPushModeAction>(action)->getMode();
-          data.push_back(mode != -1 ? mode : 0xFFFF);
-          data.push_back(0);
-          break;
-        }
-
-        case LexerActionType::SKIP:
-          data.push_back(0);
-          data.push_back(0);
-          break;
-
-        case LexerActionType::TYPE:
-        {
-          int type = std::dynamic_pointer_cast<LexerTypeAction>(action)->getType();
-          data.push_back(type != -1 ? type : 0xFFFF);
-          data.push_back(0);
-          break;
-        }
-
-        default:
-          throw IllegalArgumentException("The specified lexer action type " +
-                                         std::to_string(static_cast<size_t>(action->getActionType())) +
-                                         " is not valid.");
-      }
-    }
-  }
-
-  for (size_t i = 0; i < data.size(); i++) {
-    if (data.at(i) > 0xFFFF) {
-      throw UnsupportedOperationException("Serialized ATN data element out of range.");
-    }
-  }
-
-  return data;
-}
-
-//------------------------------------------------------------------------------------------------------------
-
-std::string ATNSerializer::decode(const std::wstring &inpdata) {
-  if (inpdata.size() < 10)
-    throw IllegalArgumentException("Not enough data to decode");
-
-  std::vector<uint16_t> data(inpdata.size());
-
-  for (size_t i = 0; i < inpdata.size(); ++i) {
-    data[i] = (uint16_t)inpdata[i];
-  }
-
-  std::string buf;
-  size_t p = 0;
-  size_t version = data[p++];
-  if (version != ATNDeserializer::SERIALIZED_VERSION) {
-    std::string reason = "Could not deserialize ATN with version " + std::to_string(version) + "(expected " +
-    std::to_string(ATNDeserializer::SERIALIZED_VERSION) + ").";
-    throw UnsupportedOperationException("ATN Serializer" + reason);
-  }
-
-  p++;  // skip grammarType
-  size_t maxType = data[p++];
-  buf.append("max type ").append(std::to_string(maxType)).append("\n");
-  size_t nstates = data[p++];
-  for (size_t i = 0; i < nstates; i++) {
-    ATNStateType stype = static_cast<ATNStateType>(data[p++]);
-    if (stype == ATNStateType::INVALID) {  // ignore bad type of states
-      continue;
-    }
-    size_t ruleIndex = data[p++];
-    if (ruleIndex == 0xFFFF) {
-      ruleIndex = INVALID_INDEX;
-    }
-
-    std::string arg = "";
-    if (stype == ATNStateType::LOOP_END) {
-      int loopBackStateNumber = data[p++];
-      arg = std::string(" ") + std::to_string(loopBackStateNumber);
-    }
-    else if (stype == ATNStateType::PLUS_BLOCK_START ||
-             stype == ATNStateType::STAR_BLOCK_START ||
-             stype == ATNStateType::BLOCK_START) {
-      int endStateNumber = data[p++];
-      arg = std::string(" ") + std::to_string(endStateNumber);
-    }
-    buf.append(std::to_string(i))
-    .append(":")
-    .append(atnStateTypeName(stype))
-    .append(" ")
-    .append(std::to_string(ruleIndex))
-    .append(arg)
-    .append("\n");
-  }
-  size_t numNonGreedyStates = data[p++];
-  p += numNonGreedyStates; // Instead of that useless loop below.
-  /*
-   for (int i = 0; i < numNonGreedyStates; i++) {
-   int stateNumber = data[p++];
-   }
-   */
-
-  size_t numPrecedenceStates = data[p++];
-  p += numPrecedenceStates;
-  /*
-   for (int i = 0; i < numPrecedenceStates; i++) {
-   int stateNumber = data[p++];
-   }
-   */
-
-  size_t nrules = data[p++];
-  for (size_t i = 0; i < nrules; i++) {
-    size_t s = data[p++];
-    if (atn->grammarType == ATNType::LEXER) {
-      size_t arg1 = data[p++];
-      buf.append("rule ")
-      .append(std::to_string(i))
-      .append(":")
-      .append(std::to_string(s))
-      .append(" ")
-      .append(std::to_string(arg1))
-      .append("\n");
-    }
-    else {
-      buf.append("rule ")
-      .append(std::to_string(i))
-      .append(":")
-      .append(std::to_string(s))
-      .append("\n");
-    }
-  }
-  size_t nmodes = data[p++];
-  for (size_t i = 0; i < nmodes; i++) {
-    size_t s = data[p++];
-    buf.append("mode ")
-    .append(std::to_string(i))
-    .append(":")
-    .append(std::to_string(s))
-    .append("\n");
-  }
-  size_t nsets = data[p++];
-  for (size_t i = 0; i < nsets; i++) {
-    size_t nintervals = data[p++];
-    buf.append(std::to_string(i)).append(":");
-    bool containsEof = data[p++] != 0;
-    if (containsEof) {
-      buf.append(getTokenName(Token::EOF));
-    }
-
-    for (size_t j = 0; j < nintervals; j++) {
-      if (containsEof || j > 0) {
-        buf.append(", ");
-      }
-
-      buf.append(getTokenName(data[p]))
-      .append("..")
-      .append(getTokenName(data[p + 1]));
-      p += 2;
-    }
-    buf.append("\n");
-  }
-  size_t nedges = data[p++];
-  for (size_t i = 0; i < nedges; i++) {
-    size_t src = data[p];
-    size_t trg = data[p + 1];
-    TransitionType ttype = static_cast<TransitionType>(data[p + 2]);
-    size_t arg1 = data[p + 3];
-    size_t arg2 = data[p + 4];
-    size_t arg3 = data[p + 5];
-    buf.append(std::to_string(src))
-    .append("->")
-    .append(std::to_string(trg))
-    .append(" ")
-    .append(transitionTypeName(ttype))
-    .append(" ")
-    .append(std::to_string(arg1))
-    .append(",")
-    .append(std::to_string(arg2))
-    .append(",")
-    .append(std::to_string(arg3))
-    .append("\n");
-    p += 6;
-  }
-  size_t ndecisions = data[p++];
-  for (size_t i = 0; i < ndecisions; i++) {
-    size_t s = data[p++];
-    buf += std::to_string(i) + ":" + std::to_string(s) + "\n";
-  }
-
-  if (atn->grammarType == ATNType::LEXER) {
-    //int lexerActionCount = data[p++];
-
-    //p += lexerActionCount * 3; // Instead of useless loop below.
-    /*
-    for (int i = 0; i < lexerActionCount; i++) {
-      LexerActionType actionType = (LexerActionType)data[p++];
-      int data1 = data[p++];
-      int data2 = data[p++];
-    }
-     */
-  }
-
-  return buf;
-}
-
-std::string ATNSerializer::getTokenName(size_t t) {
-  if (t == Token::EOF) {
-    return "EOF";
-  }
-
-  if (atn->grammarType == ATNType::LEXER && t <= 0x10FFFF) {
-    switch (t) {
-      case '\n':
-        return "'\\n'";
-      case '\r':
-        return "'\\r'";
-      case '\t':
-        return "'\\t'";
-      case '\b':
-        return "'\\b'";
-      case '\f':
-        return "'\\f'";
-      case '\\':
-        return "'\\\\'";
-      case '\'':
-        return "'\\''";
-      default:
-        std::string s_hex = antlrcpp::toHexString((int)t);
-        if (s_hex >= "0" && s_hex <= "7F" && !iscntrl((int)t)) {
-          return "'" + std::to_string(t) + "'";
-        }
-
-        // turn on the bit above max "\u10FFFF" value so that we pad with zeros
-        // then only take last 6 digits
-        std::string hex = antlrcpp::toHexString((int)t | 0x1000000).substr(1, 6);
-        std::string unicodeStr = std::string("'\\u") + hex + std::string("'");
-        return unicodeStr;
-    }
-  }
-
-  if (_tokenNames.size() > 0 && t < _tokenNames.size()) {
-    return _tokenNames[t];
-  }
-
-  return std::to_string(t);
-}
-
-std::wstring ATNSerializer::getSerializedAsString(ATN *atn) {
-  std::vector<size_t> data = getSerialized(atn);
-  std::wstring result;
-  for (size_t entry : data)
-    result.push_back((wchar_t)entry);
-
-  return result;
-}
-
-std::vector<size_t> ATNSerializer::getSerialized(ATN *atn) {
-  return ATNSerializer(atn).serialize();
-}
-
-std::string ATNSerializer::getDecoded(ATN *atn, std::vector<std::string> &tokenNames) {
-  std::wstring serialized = getSerializedAsString(atn);
-  return ATNSerializer(atn, tokenNames).decode(serialized);
-}
diff --git a/runtime/Cpp/runtime/src/atn/ATNSerializer.h b/runtime/Cpp/runtime/src/atn/ATNSerializer.h
deleted file mode 100755
index 8b77894fa6..0000000000
--- a/runtime/Cpp/runtime/src/atn/ATNSerializer.h
+++ /dev/null
@@ -1,61 +0,0 @@
-﻿/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
- * Use of this file is governed by the BSD 3-clause license that
- * can be found in the LICENSE.txt file in the project root.
- */
-
-#pragma once
-
-#include "antlr4-common.h"
-
-namespace antlr4 {
-namespace atn {
-
-  class ANTLR4CPP_PUBLIC ATNSerializer {
-  public:
-    ATN *atn;
-
-    ATNSerializer(ATN *atn);
-    ATNSerializer(ATN *atn, const std::vector<std::string> &tokenNames);
-    virtual ~ATNSerializer();
-
-    /// <summary>
-    /// Serialize state descriptors, edge descriptors, and decision->state map
-    ///  into list of ints:
-    ///
-    /// 		grammar-type, (ANTLRParser.LEXER, ...)
-    ///  	max token type,
-    ///  	num states,
-    ///  	state-0-type ruleIndex, state-1-type ruleIndex, ... state-i-type
-    ///  ruleIndex optional-arg ...
-    ///  	num rules,
-    ///  	rule-1-start-state rule-1-args, rule-2-start-state  rule-2-args, ...
-    ///  	(args are token type,actionIndex in lexer else 0,0)
-    ///      num modes,
-    ///      mode-0-start-state, mode-1-start-state, ... (parser has 0 modes)
-    ///      num sets
-    ///      set-0-interval-count intervals, set-1-interval-count intervals, ...
-    ///  	num total edges,
-    ///      src, trg, edge-type, edge arg1, optional edge arg2 (present always),
-    ///      ...
-    ///      num decisions,
-    ///      decision-0-start-state, decision-1-start-state, ...
-    ///
-    ///  Convenient to pack into unsigned shorts to make as Java string.
-    /// </summary>
-    virtual std::vector<size_t> serialize();
-
-    virtual std::string decode(const std::wstring& data);
-    virtual std::string getTokenName(size_t t);
-
-    /// Used by Java target to encode short/int array as chars in string.
-    static std::wstring getSerializedAsString(ATN *atn);
-    static std::vector<size_t> getSerialized(ATN *atn);
-
-    static std::string getDecoded(ATN *atn, std::vector<std::string> &tokenNames);
-
-  private:
-    std::vector<std::string> _tokenNames;
-  };
-
-} // namespace atn
-} // namespace antlr4
diff --git a/runtime/Cpp/runtime/src/misc/InterpreterDataReader.cpp b/runtime/Cpp/runtime/src/misc/InterpreterDataReader.cpp
index c77b8bca2b..0bcaf07b54 100755
--- a/runtime/Cpp/runtime/src/misc/InterpreterDataReader.cpp
+++ b/runtime/Cpp/runtime/src/misc/InterpreterDataReader.cpp
@@ -101,7 +101,7 @@ InterpreterData InterpreterDataReader::parseFile(std::string const& fileName) {
     };
   }
 
-  std::vector<uint16_t> serializedATN;
+  std::vector<int32_t> serializedATN;
 
   std::getline(input, line, '\n');
   assert(line == "atn:");
@@ -115,7 +115,7 @@ InterpreterData InterpreterDataReader::parseFile(std::string const& fileName) {
       number = std::strtoul(&value[1], nullptr, 10);
     else
       number = std::strtoul(value.c_str(), nullptr, 10);
-    serializedATN.push_back(static_cast<uint16_t>(number));
+    serializedATN.push_back(static_cast<int32_t>(number));
   }
 
   ATNDeserializer deserializer;
diff --git a/runtime/Cpp/runtime/src/tree/xpath/XPathLexer.cpp b/runtime/Cpp/runtime/src/tree/xpath/XPathLexer.cpp
index 5c2e8568a1..b648f6c085 100644
--- a/runtime/Cpp/runtime/src/tree/xpath/XPathLexer.cpp
+++ b/runtime/Cpp/runtime/src/tree/xpath/XPathLexer.cpp
@@ -33,7 +33,7 @@ struct XPathLexerStaticData final {
   const std::vector<std::string> literalNames;
   const std::vector<std::string> symbolicNames;
   const antlr4::dfa::Vocabulary vocabulary;
-  std::vector<uint16_t> serializedATN;
+  std::vector<int32_t> serializedATN;
   std::unique_ptr<antlr4::atn::ATN> atn;
 };
 
@@ -61,8 +61,8 @@ void xpathLexerInitialize() {
       "STRING"
     }
   );
-  static const uint16_t serializedATNSegment0[] = {
-    0x4, 0x0, 0x8, 0x32, 0x6, 0xffff, 0x2, 0x0, 0x7, 0x0, 0x2, 0x1, 0x7,
+  static const int32_t serializedATNSegment0[] = {
+    0x4, 0x0, 0x8, 0x32, 0x6, -1, 0x2, 0x0, 0x7, 0x0, 0x2, 0x1, 0x7,
        0x1, 0x2, 0x2, 0x7, 0x2, 0x2, 0x3, 0x7, 0x3, 0x2, 0x4, 0x7, 0x4,
        0x2, 0x5, 0x7, 0x5, 0x2, 0x6, 0x7, 0x6, 0x2, 0x7, 0x7, 0x7, 0x1,
        0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x1, 0x1, 0x1, 0x1, 0x2, 0x1, 0x2,
@@ -75,7 +75,7 @@ void xpathLexerInitialize() {
        0x0, 0x2, 0x5, 0x0, 0x30, 0x39, 0x5f, 0x5f, 0xb7, 0xb7, 0x300, 0x36f,
        0x203f, 0x2040, 0xd, 0x0, 0x41, 0x5a, 0x61, 0x7a, 0xc0, 0xd6, 0xd8,
        0xf6, 0xf8, 0x2ff, 0x370, 0x37d, 0x37f, 0x1fff, 0x200c, 0x200d, 0x2070,
-       0x218f, 0x2c00, 0x2fef, 0x3001, 0xd7ff, 0xf900, 0xfdcf, 0xfdf0, 0xffff,
+       0x218f, 0x2c00, 0x2fef, 0x3001, 0xd7ff, 0xf900, 0xfdcf, 0xfdf0, -1,
        0x0, 0x32, 0x0, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0,
        0x0, 0x0, 0x5, 0x1, 0x0, 0x0, 0x0, 0x0, 0x7, 0x1, 0x0, 0x0, 0x0,
        0x0, 0x9, 0x1, 0x0, 0x0, 0x0, 0x0, 0xf, 0x1, 0x0, 0x0, 0x0, 0x1,
@@ -151,7 +151,7 @@ const dfa::Vocabulary& XPathLexer::getVocabulary() const {
   return xpathLexerStaticData->vocabulary;
 }
 
-const std::vector<uint16_t>& XPathLexer::getSerializedATN() const {
+const std::vector<int32_t>& XPathLexer::getSerializedATN() const {
   return xpathLexerStaticData->serializedATN;
 }
 
diff --git a/runtime/Cpp/runtime/src/tree/xpath/XPathLexer.h b/runtime/Cpp/runtime/src/tree/xpath/XPathLexer.h
index e35f15584d..bd6711077e 100644
--- a/runtime/Cpp/runtime/src/tree/xpath/XPathLexer.h
+++ b/runtime/Cpp/runtime/src/tree/xpath/XPathLexer.h
@@ -28,7 +28,7 @@ class  XPathLexer : public antlr4::Lexer {
 
   virtual const antlr4::dfa::Vocabulary& getVocabulary() const override;
 
-  virtual const std::vector<uint16_t>& getSerializedATN() const override;
+  virtual const std::vector<int32_t>& getSerializedATN() const override;
 
   virtual const antlr4::atn::ATN& getATN() const override;
 
diff --git a/runtime/Dart/lib/src/atn/src/atn_deserializer.dart b/runtime/Dart/lib/src/atn/src/atn_deserializer.dart
index a26abd1dd8..ee6928be10 100644
--- a/runtime/Dart/lib/src/atn/src/atn_deserializer.dart
+++ b/runtime/Dart/lib/src/atn/src/atn_deserializer.dart
@@ -86,10 +86,7 @@ class ATNDeserializer {
     readRules(atn);
     readModes(atn);
     final sets = <IntervalSet>[];
-    // First, deserialize sets with 16-bit arguments <= U+FFFF.
-    readSets(atn, sets, () => readInt());
-    // Next, deserialize sets with 32-bit arguments <= U+10FFFF.
-    readSets(atn, sets, () => readInt32());
+    readSets(atn, sets);
     readEdges(atn, sets);
     readDecisions(atn);
     readLexerActions(atn);
@@ -130,9 +127,6 @@ class ATNDeserializer {
       }
 
       var ruleIndex = readInt();
-      if (ruleIndex == 0xFFFF) {
-        ruleIndex = -1;
-      }
 
       final s = stateFactory(stype, ruleIndex);
       if (s is LoopEndState) {
@@ -180,9 +174,6 @@ class ATNDeserializer {
       atn.ruleToStartState.add(startState);
       if (atn.grammarType == ATNType.LEXER) {
         var tokenType = readInt();
-        if (tokenType == 0xFFFF) {
-          tokenType = Token.EOF;
-        }
 
         atn.ruleToTokenType.add(tokenType);
       }
@@ -208,7 +199,7 @@ class ATNDeserializer {
     }
   }
 
-  void readSets(ATN atn, List<IntervalSet> sets, readUnicode) {
+  void readSets(ATN atn, List<IntervalSet> sets) {
     final nsets = readInt();
     for (var i = 0; i < nsets; i++) {
       final nintervals = readInt();
@@ -221,8 +212,8 @@ class ATNDeserializer {
       }
 
       for (var j = 0; j < nintervals; j++) {
-        int a = readUnicode();
-        int b = readUnicode();
+        int a = readInt();
+        int b = readInt();
         set.addRange(a, b);
       }
     }
@@ -321,14 +312,7 @@ class ATNDeserializer {
       atn.lexerActions = List<LexerAction>.generate(readInt(), (index) {
         final actionType = LexerActionType.values[readInt()];
         var data1 = readInt();
-        if (data1 == 0xFFFF) {
-          data1 = -1;
-        }
-
         var data2 = readInt();
-        if (data2 == 0xFFFF) {
-          data2 = -1;
-        }
         final lexerAction = lexerActionFactory(actionType, data1, data2);
 
         return lexerAction;
@@ -537,12 +521,6 @@ class ATNDeserializer {
     return data[pos++];
   }
 
-  int readInt32() {
-    final low = readInt();
-    final high = readInt();
-    return low | (high << 16);
-  }
-
   Transition edgeFactory(
     ATN atn,
     TransitionType type,
diff --git a/runtime/Go/antlr/atn_deserializer.go b/runtime/Go/antlr/atn_deserializer.go
index 1b042dcb10..aea9bbfa93 100644
--- a/runtime/Go/antlr/atn_deserializer.go
+++ b/runtime/Go/antlr/atn_deserializer.go
@@ -23,7 +23,7 @@ type blockStartStateIntPair struct {
 
 type ATNDeserializer struct {
 	options *ATNDeserializationOptions
-	data    []uint16
+	data    []int32
 	pos     int
 }
 
@@ -45,7 +45,7 @@ func stringInSlice(a string, list []string) int {
 	return -1
 }
 
-func (a *ATNDeserializer) DeserializeFromUInt16(data []uint16) *ATN {
+func (a *ATNDeserializer) Deserialize(data []int32) *ATN {
 	a.data = data
 	a.pos = 0
 	a.checkVersion()
@@ -56,10 +56,7 @@ func (a *ATNDeserializer) DeserializeFromUInt16(data []uint16) *ATN {
 	a.readRules(atn)
 	a.readModes(atn)
 
-	// First, deserialize sets with 16-bit arguments <= U+FFFF.
-	sets := a.readSets(atn, nil, a.readInt)
-	// Next, deserialize sets with 32-bit arguments <= U+10FFFF.
-	sets = a.readSets(atn, sets, a.readInt32)
+	sets := a.readSets(atn, nil)
 
 	a.readEdges(atn, sets)
 	a.readDecisions(atn)
@@ -113,10 +110,6 @@ func (a *ATNDeserializer) readStates(atn *ATN) {
 
 		ruleIndex := a.readInt()
 
-		if ruleIndex == 0xFFFF {
-			ruleIndex = -1
-		}
-
 		s := a.stateFactory(stype, ruleIndex)
 
 		if stype == ATNStateLoopEnd {
@@ -175,10 +168,6 @@ func (a *ATNDeserializer) readRules(atn *ATN) {
 		if atn.grammarType == ATNTypeLexer {
 			tokenType := a.readInt()
 
-			if tokenType == 0xFFFF {
-				tokenType = TokenEOF
-			}
-
 			atn.ruleToTokenType[i] = tokenType
 		}
 	}
@@ -204,7 +193,7 @@ func (a *ATNDeserializer) readModes(atn *ATN) {
 	}
 }
 
-func (a *ATNDeserializer) readSets(atn *ATN, sets []*IntervalSet, readUnicode func() int) []*IntervalSet {
+func (a *ATNDeserializer) readSets(atn *ATN, sets []*IntervalSet) []*IntervalSet {
 	m := a.readInt()
 
 	// Preallocate the needed capacity.
@@ -227,8 +216,8 @@ func (a *ATNDeserializer) readSets(atn *ATN, sets []*IntervalSet, readUnicode fu
 		}
 
 		for j := 0; j < n; j++ {
-			i1 := readUnicode()
-			i2 := readUnicode()
+			i1 := a.readInt()
+			i2 := a.readInt()
 
 			iset.addRange(i1, i2)
 		}
@@ -330,17 +319,7 @@ func (a *ATNDeserializer) readLexerActions(atn *ATN) {
 		for i := range atn.lexerActions {
 			actionType := a.readInt()
 			data1 := a.readInt()
-
-			if data1 == 0xFFFF {
-				data1 = -1
-			}
-
 			data2 := a.readInt()
-
-			if data2 == 0xFFFF {
-				data2 = -1
-			}
-
 			atn.lexerActions[i] = a.lexerActionFactory(actionType, data1, data2)
 		}
 	}
@@ -571,13 +550,7 @@ func (a *ATNDeserializer) readInt() int {
 
 	a.pos++
 
-	return int(v)
-}
-
-func (a *ATNDeserializer) readInt32() int {
-	var low = a.readInt()
-	var high = a.readInt()
-	return low | (high << 16)
+	return int(v) // data is 32 bits but int is at least that big
 }
 
 func (a *ATNDeserializer) edgeFactory(atn *ATN, typeIndex, src, trg, arg1, arg2, arg3 int, sets []*IntervalSet) Transition {
diff --git a/runtime/Go/antlr/testing_lexer_b_test.go b/runtime/Go/antlr/testing_lexer_b_test.go
index b5691b574c..d07782b17f 100644
--- a/runtime/Go/antlr/testing_lexer_b_test.go
+++ b/runtime/Go/antlr/testing_lexer_b_test.go
@@ -20,7 +20,7 @@ MULT : '*';
 WS : ' '+;
 */
 
-var lexerB_serializedLexerAtn = []uint16{
+var lexerB_serializedLexerAtn = []int32{
 	4, 0, 7, 38, 6, 65535, 2, 0, 7, 0, 2, 1, 7, 1, 2, 2, 7, 2, 2, 3, 7, 3,
 	2, 4, 7, 4, 2, 5, 7, 5, 2, 6, 7, 6, 1, 0, 4, 0, 17, 8, 0, 11, 0, 12, 0,
 	18, 1, 1, 4, 1, 22, 8, 1, 11, 1, 12, 1, 23, 1, 2, 1, 2, 1, 3, 1, 3, 1,
@@ -40,7 +40,7 @@ var lexerB_serializedLexerAtn = []uint16{
 }
 
 var lexerB_lexerDeserializer = NewATNDeserializer(nil)
-var lexerB_lexerAtn = lexerB_lexerDeserializer.DeserializeFromUInt16(lexerB_serializedLexerAtn)
+var lexerB_lexerAtn = lexerB_lexerDeserializer.Deserialize(lexerB_serializedLexerAtn)
 
 var lexerB_lexerChannelNames = []string{
 	"DEFAULT_TOKEN_CHANNEL", "HIDDEN",
diff --git a/runtime/Go/antlr/tokenstream_rewriter_test.go b/runtime/Go/antlr/tokenstream_rewriter_test.go
index 852f50232c..b3647ed318 100644
--- a/runtime/Go/antlr/tokenstream_rewriter_test.go
+++ b/runtime/Go/antlr/tokenstream_rewriter_test.go
@@ -328,7 +328,7 @@ func TestLexerA(t *testing.T){
 var _ = fmt.Printf
 var _ = unicode.IsLetter
 
-var serializedLexerAtn = []uint16{
+var serializedLexerAtn = []int32{
 	4, 0, 3, 13, 6, 65535, 2, 0, 7, 0, 2, 1, 7, 1, 2, 2, 7, 2, 1, 0, 1, 0,
 	1, 1, 1, 1, 1, 2, 1, 2, 0, 0, 3, 1, 1, 3, 2, 5, 3, 1, 0, 0, 0, 12, 0, 1,
 	1, 0, 0, 0, 0, 3, 1, 0, 0, 0, 0, 5, 1, 0, 0, 0, 1, 7, 1, 0, 0, 0, 3, 9,
@@ -338,7 +338,7 @@ var serializedLexerAtn = []uint16{
 }
 
 var lexerDeserializer = NewATNDeserializer(nil)
-var lexerAtn = lexerDeserializer.DeserializeFromUInt16(serializedLexerAtn)
+var lexerAtn = lexerDeserializer.Deserialize(serializedLexerAtn)
 
 var lexerChannelNames = []string{
 	"DEFAULT_TOKEN_CHANNEL", "HIDDEN",
diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/ATNDeserializer.java b/runtime/Java/src/org/antlr/v4/runtime/atn/ATNDeserializer.java
index 0bd643a131..0ec2a9359e 100644
--- a/runtime/Java/src/org/antlr/v4/runtime/atn/ATNDeserializer.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/atn/ATNDeserializer.java
@@ -7,15 +7,20 @@
 package org.antlr.v4.runtime.atn;
 
 import org.antlr.v4.runtime.Token;
+import org.antlr.v4.runtime.misc.IntegerList;
 import org.antlr.v4.runtime.misc.IntervalSet;
 import org.antlr.v4.runtime.misc.Pair;
 
 import java.io.InvalidClassException;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
 import java.util.Locale;
 
-/**
+/** Deserialize ATNs for JavaTarget; it's complicated by the fact that java requires
+ *  that we serialize the list of integers as 16 bit characters in a string. Other
+ *  targets will have an array of ints generated and can simply decode the ints
+ *  back into an ATN.
  *
  * @author Sam Harwell
  */
@@ -25,49 +30,6 @@ public class ATNDeserializer {
 		SERIALIZED_VERSION = 4;
 	}
 
-	interface UnicodeDeserializer {
-		// Wrapper for readInt() or readInt32()
-		int readUnicode(char[] data, int p);
-
-		// Work around Java not allowing mutation of captured variables
-		// by returning amount by which to increment p after each read
-		int size();
-	}
-
-	enum UnicodeDeserializingMode {
-		UNICODE_BMP,
-		UNICODE_SMP
-	}
-
-	static UnicodeDeserializer getUnicodeDeserializer(UnicodeDeserializingMode mode) {
-		if (mode == UnicodeDeserializingMode.UNICODE_BMP) {
-			return new UnicodeDeserializer() {
-				@Override
-				public int readUnicode(char[] data, int p) {
-					return toInt(data[p]);
-				}
-
-				@Override
-				public int size() {
-					return 1;
-				}
-			};
-		}
-		else {
-			return new UnicodeDeserializer() {
-				@Override
-				public int readUnicode(char[] data, int p) {
-					return toInt32(data, p);
-				}
-
-				@Override
-				public int size() {
-					return 2;
-				}
-			};
-		}
-	}
-
 	private final ATNDeserializationOptions deserializationOptions;
 
 	public ATNDeserializer() {
@@ -83,20 +45,19 @@ public ATNDeserializer(ATNDeserializationOptions deserializationOptions) {
 	}
 
 	public ATN deserialize(char[] data) {
-		data = data.clone();
-		for (int i = 1; i < data.length; i++) {
-			data[i] = (char) (data[i] - 2);
-		}
+		return deserialize(decodeIntsEncodedAs16BitWords(data));
+	}
 
+	public ATN deserialize(int[] data) {
 		int p = 0;
-		int version = toInt(data[p++]);
+		int version = data[p++];
 		if (version != SERIALIZED_VERSION) {
 			String reason = String.format(Locale.getDefault(), "Could not deserialize ATN with version %d (expected %d).", version, SERIALIZED_VERSION);
 			throw new UnsupportedOperationException(new InvalidClassException(ATN.class.getName(), reason));
 		}
 
-		ATNType grammarType = ATNType.values()[toInt(data[p++])];
-		int maxTokenType = toInt(data[p++]);
+		ATNType grammarType = ATNType.values()[data[p++]];
+		int maxTokenType = data[p++];
 		ATN atn = new ATN(grammarType, maxTokenType);
 
 		//
@@ -104,27 +65,23 @@ public ATN deserialize(char[] data) {
 		//
 		List<Pair<LoopEndState, Integer>> loopBackStateNumbers = new ArrayList<Pair<LoopEndState, Integer>>();
 		List<Pair<BlockStartState, Integer>> endStateNumbers = new ArrayList<Pair<BlockStartState, Integer>>();
-		int nstates = toInt(data[p++]);
+		int nstates = data[p++];
 		for (int i=0; i<nstates; i++) {
-			int stype = toInt(data[p++]);
+			int stype = data[p++];
 			// ignore bad type of states
 			if ( stype==ATNState.INVALID_TYPE ) {
 				atn.addState(null);
 				continue;
 			}
 
-			int ruleIndex = toInt(data[p++]);
-			if (ruleIndex == Character.MAX_VALUE) {
-				ruleIndex = -1;
-			}
-
+			int ruleIndex = data[p++];
 			ATNState s = stateFactory(stype, ruleIndex);
 			if ( stype == ATNState.LOOP_END ) { // special case
-				int loopBackStateNumber = toInt(data[p++]);
+				int loopBackStateNumber = data[p++];
 				loopBackStateNumbers.add(new Pair<LoopEndState, Integer>((LoopEndState)s, loopBackStateNumber));
 			}
 			else if (s instanceof BlockStartState) {
-				int endStateNumber = toInt(data[p++]);
+				int endStateNumber = data[p++];
 				endStateNumbers.add(new Pair<BlockStartState, Integer>((BlockStartState)s, endStateNumber));
 			}
 			atn.addState(s);
@@ -139,37 +96,33 @@ else if (s instanceof BlockStartState) {
 			pair.a.endState = (BlockEndState)atn.states.get(pair.b);
 		}
 
-		int numNonGreedyStates = toInt(data[p++]);
+		int numNonGreedyStates = data[p++];
 		for (int i = 0; i < numNonGreedyStates; i++) {
-			int stateNumber = toInt(data[p++]);
+			int stateNumber = data[p++];
 			((DecisionState)atn.states.get(stateNumber)).nonGreedy = true;
 		}
 
-		int numPrecedenceStates = toInt(data[p++]);
+		int numPrecedenceStates = data[p++];
 		for (int i = 0; i < numPrecedenceStates; i++) {
-			int stateNumber = toInt(data[p++]);
+			int stateNumber = data[p++];
 			((RuleStartState)atn.states.get(stateNumber)).isLeftRecursiveRule = true;
 		}
 
 		//
 		// RULES
 		//
-		int nrules = toInt(data[p++]);
+		int nrules = data[p++];
 		if ( atn.grammarType == ATNType.LEXER ) {
 			atn.ruleToTokenType = new int[nrules];
 		}
 
 		atn.ruleToStartState = new RuleStartState[nrules];
 		for (int i=0; i<nrules; i++) {
-			int s = toInt(data[p++]);
+			int s = data[p++];
 			RuleStartState startState = (RuleStartState)atn.states.get(s);
 			atn.ruleToStartState[i] = startState;
 			if ( atn.grammarType == ATNType.LEXER ) {
-				int tokenType = toInt(data[p++]);
-				if (tokenType == 0xFFFF) {
-					tokenType = Token.EOF;
-				}
-
+				int tokenType = data[p++];
 				atn.ruleToTokenType[i] = tokenType;
 			}
 		}
@@ -188,9 +141,9 @@ else if (s instanceof BlockStartState) {
 		//
 		// MODES
 		//
-		int nmodes = toInt(data[p++]);
+		int nmodes = data[p++];
 		for (int i=0; i<nmodes; i++) {
-			int s = toInt(data[p++]);
+			int s = data[p++];
 			atn.modeToStartState.add((TokensStartState)atn.states.get(s));
 		}
 
@@ -198,24 +151,19 @@ else if (s instanceof BlockStartState) {
 		// SETS
 		//
 		List<IntervalSet> sets = new ArrayList<IntervalSet>();
-
-		// First, read all sets with 16-bit Unicode code points <= U+FFFF.
-		p = deserializeSets(data, p, sets, getUnicodeDeserializer(UnicodeDeserializingMode.UNICODE_BMP));
-
-		// Next, deserialize sets with 32-bit arguments <= U+10FFFF.
-		p = deserializeSets(data, p, sets, getUnicodeDeserializer(UnicodeDeserializingMode.UNICODE_SMP));
+		p = deserializeSets(data, p, sets);
 
 		//
 		// EDGES
 		//
-		int nedges = toInt(data[p++]);
+		int nedges = data[p++];
 		for (int i=0; i<nedges; i++) {
-			int src = toInt(data[p]);
-			int trg = toInt(data[p+1]);
-			int ttype = toInt(data[p+2]);
-			int arg1 = toInt(data[p+3]);
-			int arg2 = toInt(data[p+4]);
-			int arg3 = toInt(data[p+5]);
+			int src = data[p];
+			int trg = data[p+1];
+			int ttype = data[p+2];
+			int arg1 = data[p+3];
+			int arg2 = data[p+4];
+			int arg3 = data[p+5];
 			Transition trans = edgeFactory(atn, ttype, src, trg, arg1, arg2, arg3, sets);
 //			System.out.println("EDGE "+trans.getClass().getSimpleName()+" "+
 //							   src+"->"+trg+
@@ -285,9 +233,9 @@ else if (state instanceof StarLoopbackState) {
 		//
 		// DECISIONS
 		//
-		int ndecisions = toInt(data[p++]);
+		int ndecisions = data[p++];
 		for (int i=1; i<=ndecisions; i++) {
-			int s = toInt(data[p++]);
+			int s = data[p++];
 			DecisionState decState = (DecisionState)atn.states.get(s);
 			atn.decisionToState.add(decState);
 			decState.decision = i-1;
@@ -297,18 +245,11 @@ else if (state instanceof StarLoopbackState) {
 		// LEXER ACTIONS
 		//
 		if (atn.grammarType == ATNType.LEXER) {
-			atn.lexerActions = new LexerAction[toInt(data[p++])];
+			atn.lexerActions = new LexerAction[data[p++]];
 			for (int i = 0; i < atn.lexerActions.length; i++) {
-				LexerActionType actionType = LexerActionType.values()[toInt(data[p++])];
-				int data1 = toInt(data[p++]);
-				if (data1 == 0xFFFF) {
-					data1 = -1;
-				}
-
-				int data2 = toInt(data[p++]);
-				if (data2 == 0xFFFF) {
-					data2 = -1;
-				}
+				LexerActionType actionType = LexerActionType.values()[data[p++]];
+				int data1 = data[p++];
+				int data2 = data[p++];
 
 				LexerAction lexerAction = lexerActionFactory(actionType, data1, data2);
 
@@ -415,24 +356,22 @@ else if (state instanceof StarLoopbackState) {
 		return atn;
 	}
 
-	private int deserializeSets(char[] data, int p, List<IntervalSet> sets, UnicodeDeserializer unicodeDeserializer) {
-		int nsets = toInt(data[p++]);
+	private int deserializeSets(int[] data, int p, List<IntervalSet> sets) {
+		int nsets = data[p++];
 		for (int i=0; i<nsets; i++) {
-			int nintervals = toInt(data[p]);
+			int nintervals = data[p];
 			p++;
 			IntervalSet set = new IntervalSet();
 			sets.add(set);
 
-			boolean containsEof = toInt(data[p++]) != 0;
+			boolean containsEof = data[p++] != 0;
 			if (containsEof) {
 				set.add(-1);
 			}
 
 			for (int j=0; j<nintervals; j++) {
-				int a = unicodeDeserializer.readUnicode(data, p);
-				p += unicodeDeserializer.size();
-				int b = unicodeDeserializer.readUnicode(data, p);
-				p += unicodeDeserializer.size();
+				int a = data[p++];
+				int b = data[p++];
 				set.add(a, b);
 			}
 		}
@@ -547,6 +486,10 @@ protected static int toInt32(char[] data, int offset) {
 		return (int)data[offset] | ((int)data[offset + 1] << 16);
 	}
 
+	protected static int toInt32(int[] data, int offset) {
+		return data[offset] | (data[offset + 1] << 16);
+	}
+
 	protected Transition edgeFactory(ATN atn,
 										 int type, int src, int trg,
 										 int arg1, int arg2, int arg3,
@@ -643,4 +586,76 @@ protected LexerAction lexerActionFactory(LexerActionType type, int data1, int da
 			throw new IllegalArgumentException(String.format(Locale.getDefault(), "The specified lexer action type %s is not valid.", type));
 		}
 	}
+
+	/** Given a list of integers representing a serialized ATN, encode values too large to fit into 15 bits
+	 *  as two 16bit values. We use the high bit (0x8000_0000) to indicate values requiring two 16 bit words.
+	 *  If the high bit is set, we grab the next value and combine them to get a 31-bit value. The possible
+	 *  input int values are [-1,0x7FFF_FFFF].
+	 *
+	 * 		| compression/encoding                         | uint16 count | type            |
+	 * 		| -------------------------------------------- | ------------ | --------------- |
+	 * 		| 0xxxxxxx xxxxxxxx                            | 1            | uint (15 bit)   |
+	 * 		| 1xxxxxxx xxxxxxxx yyyyyyyy yyyyyyyy          | 2            | uint (16+ bits) |
+	 * 		| 11111111 11111111 11111111 11111111          | 2            | int value -1    |
+	 *
+	 * 	This is only used (other than for testing) by {@link org.antlr.v4.codegen.model.SerializedJavaATN}
+	 * 	to encode ints as char values for the java target, but it is convenient to combine it with the
+	 * 	#decodeIntsEncodedAs16BitWords that follows as they are a pair (I did not want to introduce a new class
+	 * 	into the runtime). Used only for Java Target.
+	 */
+	public static IntegerList encodeIntsWith16BitWords(IntegerList data) {
+		IntegerList data16 = new IntegerList((int)(data.size()*1.5));
+		for (int i = 0; i < data.size(); i++) {
+			int v = data.get(i);
+			if ( v==-1 ) { // use two max uint16 for -1
+				data16.add(0xFFFF);
+				data16.add(0xFFFF);
+			}
+			else if (v <= 0x7FFF) {
+				data16.add(v);
+			}
+			else { // v > 0x7FFF
+				if ( v>=0x7FFF_FFFF ) { // too big to fit in 15 bits + 16 bits? (+1 would be 8000_0000 which is bad encoding)
+					throw new UnsupportedOperationException("Serialized ATN data element["+i+"] = "+v+" doesn't fit in 31 bits");
+				}
+				v = v & 0x7FFF_FFFF;					// strip high bit (sentinel) if set
+				data16.add((v >> 16) | 0x8000);   // store high 15-bit word first and set high bit to say word follows
+				data16.add((v & 0xFFFF)); 		// then store lower 16-bit word
+			}
+		}
+		return data16;
+	}
+
+	public static int[] decodeIntsEncodedAs16BitWords(char[] data16) {
+		return decodeIntsEncodedAs16BitWords(data16, false);
+	}
+
+	/** Convert a list of chars (16 uint) that represent a serialized and compressed list of ints for an ATN.
+	 *  This method pairs with {@link #encodeIntsWith16BitWords(IntegerList)} above. Used only for Java Target.
+	 */
+	public static int[] decodeIntsEncodedAs16BitWords(char[] data16, boolean trimToSize) {
+		// will be strictly smaller but we waste bit of space to avoid copying during initialization of parsers
+		int[] data = new int[data16.length];
+		int i = 0;
+		int i2 = 0;
+		while ( i < data16.length ) {
+			char v = data16[i++];
+			if ( (v & 0x8000) == 0 ) { // hi bit not set? Implies 1-word value
+				data[i2++] = v; // 7 bit int
+			}
+			else { // hi bit set. Implies 2-word value
+				char vnext = data16[i++];
+				if ( v==0xFFFF && vnext == 0xFFFF ) { // is it -1?
+					data[i2++] = -1;
+				}
+				else { // 31-bit int
+					data[i2++] = (v & 0x7FFF) << 16 | (vnext & 0xFFFF);
+				}
+			}
+		}
+		if ( trimToSize ) {
+			return Arrays.copyOf(data, i2);
+		}
+		return data;
+	}
 }
diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/ATNSerializer.java b/runtime/Java/src/org/antlr/v4/runtime/atn/ATNSerializer.java
index e201ce117d..6a9515eb5d 100644
--- a/runtime/Java/src/org/antlr/v4/runtime/atn/ATNSerializer.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/atn/ATNSerializer.java
@@ -10,38 +10,37 @@
 import org.antlr.v4.runtime.misc.IntegerList;
 import org.antlr.v4.runtime.misc.Interval;
 import org.antlr.v4.runtime.misc.IntervalSet;
-import org.antlr.v4.runtime.misc.Utils;
 
-import java.io.InvalidClassException;
-import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.LinkedHashMap;
-import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 
+/** This class represents a target neutral serializer for ATNs. An ATN is converted to a list of integers
+ *  that can be converted back to and ATN. We compute the list of integers and then generate an array
+ *  into the target language for a particular lexer or parser.  Java is a special case where we must
+ *  generate strings instead of arrays, but that is handled outside of this class.
+ *  See {@link ATNDeserializer#encodeIntsWith16BitWords(IntegerList)} and
+ *  {@link org.antlr.v4.codegen.model.SerializedJavaATN}.
+ */
 public class ATNSerializer {
 	public ATN atn;
-	private List<String> tokenNames;
 
-	private interface CodePointSerializer {
-		void serializeCodePoint(IntegerList data, int cp);
-	}
+	private final IntegerList data = new IntegerList();
+	/** Note that we use a LinkedHashMap as a set to mainintain insertion order while deduplicating
+	    entries with the same key. */
+	private final Map<IntervalSet, Boolean> sets = new LinkedHashMap<>();
+	private final IntegerList nonGreedyStates = new IntegerList();
+	private final IntegerList precedenceStates = new IntegerList();
 
 	public ATNSerializer(ATN atn) {
 		assert atn.grammarType != null;
 		this.atn = atn;
 	}
 
-	public ATNSerializer(ATN atn, List<String> tokenNames) {
-		assert atn.grammarType != null;
-		this.atn = atn;
-		this.tokenNames = tokenNames;
-	}
-
 	/** Serialize state descriptors, edge descriptors, and decision&rarr;state map
-	 *  into list of ints:
+	 *  into list of ints.  Likely out of date, but keeping as it could be helpful:
 	 *
 	 *      SERIALIZED_VERSION
 	 *      UUID (2 longs)
@@ -65,141 +64,99 @@ public ATNSerializer(ATN atn, List<String> tokenNames) {
 	 *
 	 *  Convenient to pack into unsigned shorts to make as Java string.
 	 */
-	public IntegerList serialize(String language) {
-		IntegerList data = new IntegerList();
+	public IntegerList serialize() {
+		addPreamble();
+		int nedges = addEdges();
+		addNonGreedyStates();
+		addPrecedenceStates();
+		addRuleStatesAndLexerTokenTypes();
+		addModeStartStates();
+		Map<IntervalSet, Integer> setIndices = null;
+		setIndices = addSets();
+		addEdges(nedges, setIndices);
+		addDecisionStartStates();
+		addLexerActions();
+
+		return data;
+	}
+
+	private void addPreamble() {
 		data.add(ATNDeserializer.SERIALIZED_VERSION);
 
 		// convert grammar type to ATN const to avoid dependence on ANTLRParser
 		data.add(atn.grammarType.ordinal());
 		data.add(atn.maxTokenType);
-		int nedges = 0;
-
-		// Note that we use a LinkedHashMap as a set to
-		// maintain insertion order while deduplicating
-		// entries with the same key.
-		Map<IntervalSet, Boolean> sets = new LinkedHashMap<>();
-
-		// dump states, count edges and collect sets while doing so
-		IntegerList nonGreedyStates = new IntegerList();
-		IntegerList precedenceStates = new IntegerList();
-		data.add(atn.states.size());
-		for (ATNState s : atn.states) {
-			if ( s==null ) { // might be optimized away
-				data.add(ATNState.INVALID_TYPE);
-				continue;
-			}
-
-			int stateType = s.getStateType();
-			if (s instanceof DecisionState && ((DecisionState)s).nonGreedy) {
-				nonGreedyStates.add(s.stateNumber);
-			}
+	}
 
-			if (s instanceof RuleStartState && ((RuleStartState)s).isLeftRecursiveRule) {
-				precedenceStates.add(s.stateNumber);
-			}
+	private void addLexerActions() {
+		if (atn.grammarType == ATNType.LEXER) {
+			data.add(atn.lexerActions.length);
+			for (LexerAction action : atn.lexerActions) {
+				data.add(action.getActionType().ordinal());
+				switch (action.getActionType()) {
+				case CHANNEL:
+					int channel = ((LexerChannelAction)action).getChannel();
+					data.add(channel);
+					data.add(0);
+					break;
 
-			data.add(stateType);
+				case CUSTOM:
+					int ruleIndex = ((LexerCustomAction)action).getRuleIndex();
+					int actionIndex = ((LexerCustomAction)action).getActionIndex();
+					data.add(ruleIndex);
+					data.add(actionIndex);
+					break;
 
-			if (s.ruleIndex == -1) {
-				data.add(Character.MAX_VALUE);
-			}
-			else {
-				data.add(s.ruleIndex);
-			}
+				case MODE:
+					int mode = ((LexerModeAction)action).getMode();
+					data.add(mode);
+					data.add(0);
+					break;
 
-			if ( s.getStateType() == ATNState.LOOP_END ) {
-				data.add(((LoopEndState)s).loopBackState.stateNumber);
-			}
-			else if ( s instanceof BlockStartState ) {
-				data.add(((BlockStartState)s).endState.stateNumber);
-			}
+				case MORE:
+					data.add(0);
+					data.add(0);
+					break;
 
-			if (s.getStateType() != ATNState.RULE_STOP) {
-				// the deserializer can trivially derive these edges, so there's no need to serialize them
-				nedges += s.getNumberOfTransitions();
-			}
+				case POP_MODE:
+					data.add(0);
+					data.add(0);
+					break;
 
-			for (int i=0; i<s.getNumberOfTransitions(); i++) {
-				Transition t = s.transition(i);
-				int edgeType = Transition.serializationTypes.get(t.getClass());
-				if ( edgeType == Transition.SET || edgeType == Transition.NOT_SET ) {
-					SetTransition st = (SetTransition)t;
-					sets.put(st.set, true);
-				}
-			}
-		}
+				case PUSH_MODE:
+					mode = ((LexerPushModeAction)action).getMode();
+					data.add(mode);
+					data.add(0);
+					break;
 
-		// non-greedy states
-		data.add(nonGreedyStates.size());
-		for (int i = 0; i < nonGreedyStates.size(); i++) {
-			data.add(nonGreedyStates.get(i));
-		}
+				case SKIP:
+					data.add(0);
+					data.add(0);
+					break;
 
-		// precedence states
-		data.add(precedenceStates.size());
-		for (int i = 0; i < precedenceStates.size(); i++) {
-			data.add(precedenceStates.get(i));
-		}
+				case TYPE:
+					int type = ((LexerTypeAction)action).getType();
+					data.add(type);
+					data.add(0);
+					break;
 
-		int nrules = atn.ruleToStartState.length;
-		data.add(nrules);
-		for (int r=0; r<nrules; r++) {
-			ATNState ruleStartState = atn.ruleToStartState[r];
-			data.add(ruleStartState.stateNumber);
-			if (atn.grammarType == ATNType.LEXER) {
-				if (atn.ruleToTokenType[r] == Token.EOF) {
-					data.add(Character.MAX_VALUE);
-				}
-				else {
-					data.add(atn.ruleToTokenType[r]);
+				default:
+					String message = String.format(Locale.getDefault(), "The specified lexer action type %s is not valid.", action.getActionType());
+					throw new IllegalArgumentException(message);
 				}
 			}
 		}
+	}
 
-		int nmodes = atn.modeToStartState.size();
-		data.add(nmodes);
-		if ( nmodes>0 ) {
-			for (ATNState modeStartState : atn.modeToStartState) {
-				data.add(modeStartState.stateNumber);
-			}
-		}
-		List<IntervalSet> bmpSets = new ArrayList<>();
-		List<IntervalSet> smpSets = new ArrayList<>();
-		for (IntervalSet set : sets.keySet()) {
-			if (!set.isNil() && set.getMaxElement() <= Character.MAX_VALUE) {
-				bmpSets.add(set);
-			}
-			else {
-				smpSets.add(set);
-			}
-		}
-		serializeSets(
-			data,
-			bmpSets,
-			new CodePointSerializer() {
-				@Override
-				public void serializeCodePoint(IntegerList data, int cp) {
-					data.add(cp);
-				}
-			});
-		serializeSets(
-			data,
-			smpSets,
-			new CodePointSerializer() {
-				@Override
-				public void serializeCodePoint(IntegerList data, int cp) {
-					serializeInt(data, cp);
-				}
-			});
-		Map<IntervalSet, Integer> setIndices = new HashMap<>();
-		int setIndex = 0;
-		for (IntervalSet bmpSet : bmpSets) {
-			setIndices.put(bmpSet, setIndex++);
-		}
-		for (IntervalSet smpSet : smpSets) {
-			setIndices.put(smpSet, setIndex++);
+	private void addDecisionStartStates() {
+		int ndecisions = atn.decisionToState.size();
+		data.add(ndecisions);
+		for (DecisionState decStartState : atn.decisionToState) {
+			data.add(decStartState.stateNumber);
 		}
+	}
 
+	private void addEdges(int nedges, Map<IntervalSet, Integer> setIndices) {
 		data.add(nedges);
 		for (ATNState s : atn.states) {
 			if ( s==null ) {
@@ -248,7 +205,6 @@ public void serializeCodePoint(IntegerList data, int cp) {
 							arg1 = 0;
 							arg3 = 1;
 						}
-
 						break;
 					case Transition.ATOM :
 						arg1 = ((AtomTransition)t).label;
@@ -256,16 +212,11 @@ public void serializeCodePoint(IntegerList data, int cp) {
 							arg1 = 0;
 							arg3 = 1;
 						}
-
 						break;
 					case Transition.ACTION :
 						ActionTransition at = (ActionTransition)t;
 						arg1 = at.ruleIndex;
 						arg2 = at.actionIndex;
-						if (arg2 == -1) {
-							arg2 = 0xFFFF;
-						}
-
 						arg3 = at.isCtxDependent ? 1 : 0 ;
 						break;
 					case Transition.SET :
@@ -286,93 +237,102 @@ public void serializeCodePoint(IntegerList data, int cp) {
 				data.add(arg3);
 			}
 		}
+	}
 
-		int ndecisions = atn.decisionToState.size();
-		data.add(ndecisions);
-		for (DecisionState decStartState : atn.decisionToState) {
-			data.add(decStartState.stateNumber);
+	private Map<IntervalSet, Integer> addSets() {
+		serializeSets(data,	sets.keySet());
+		Map<IntervalSet, Integer> setIndices = new HashMap<>();
+		int setIndex = 0;
+		for (IntervalSet s : sets.keySet()) {
+			setIndices.put(s, setIndex++);
 		}
+		return setIndices;
+	}
 
-		//
-		// LEXER ACTIONS
-		//
-		if (atn.grammarType == ATNType.LEXER) {
-			data.add(atn.lexerActions.length);
-			for (LexerAction action : atn.lexerActions) {
-				data.add(action.getActionType().ordinal());
-				switch (action.getActionType()) {
-				case CHANNEL:
-					int channel = ((LexerChannelAction)action).getChannel();
-					data.add(channel != -1 ? channel : 0xFFFF);
-					data.add(0);
-					break;
+	private void addModeStartStates() {
+		int nmodes = atn.modeToStartState.size();
+		data.add(nmodes);
+		if ( nmodes>0 ) {
+			for (ATNState modeStartState : atn.modeToStartState) {
+				data.add(modeStartState.stateNumber);
+			}
+		}
+	}
 
-				case CUSTOM:
-					int ruleIndex = ((LexerCustomAction)action).getRuleIndex();
-					int actionIndex = ((LexerCustomAction)action).getActionIndex();
-					data.add(ruleIndex != -1 ? ruleIndex : 0xFFFF);
-					data.add(actionIndex != -1 ? actionIndex : 0xFFFF);
-					break;
+	private void addRuleStatesAndLexerTokenTypes() {
+		int nrules = atn.ruleToStartState.length;
+		data.add(nrules);
+		for (int r=0; r<nrules; r++) {
+			ATNState ruleStartState = atn.ruleToStartState[r];
+			data.add(ruleStartState.stateNumber);
+			if (atn.grammarType == ATNType.LEXER) {
+				assert atn.ruleToTokenType[r]>=0; // 0 implies fragment rule, other token types > 0
+				data.add(atn.ruleToTokenType[r]);
+			}
+		}
+	}
 
-				case MODE:
-					int mode = ((LexerModeAction)action).getMode();
-					data.add(mode != -1 ? mode : 0xFFFF);
-					data.add(0);
-					break;
+	private void addPrecedenceStates() {
+		data.add(precedenceStates.size());
+		for (int i = 0; i < precedenceStates.size(); i++) {
+			data.add(precedenceStates.get(i));
+		}
+	}
 
-				case MORE:
-					data.add(0);
-					data.add(0);
-					break;
+	private void addNonGreedyStates() {
+		data.add(nonGreedyStates.size());
+		for (int i = 0; i < nonGreedyStates.size(); i++) {
+			data.add(nonGreedyStates.get(i));
+		}
+	}
 
-				case POP_MODE:
-					data.add(0);
-					data.add(0);
-					break;
+	private int addEdges() {
+		int nedges = 0;
+		data.add(atn.states.size());
+		for (ATNState s : atn.states) {
+			if ( s==null ) { // might be optimized away
+				data.add(ATNState.INVALID_TYPE);
+				continue;
+			}
 
-				case PUSH_MODE:
-					mode = ((LexerPushModeAction)action).getMode();
-					data.add(mode != -1 ? mode : 0xFFFF);
-					data.add(0);
-					break;
+			int stateType = s.getStateType();
+			if (s instanceof DecisionState && ((DecisionState)s).nonGreedy) {
+				nonGreedyStates.add(s.stateNumber);
+			}
 
-				case SKIP:
-					data.add(0);
-					data.add(0);
-					break;
+			if (s instanceof RuleStartState && ((RuleStartState)s).isLeftRecursiveRule) {
+				precedenceStates.add(s.stateNumber);
+			}
 
-				case TYPE:
-					int type = ((LexerTypeAction)action).getType();
-					data.add(type != -1 ? type : 0xFFFF);
-					data.add(0);
-					break;
+			data.add(stateType);
 
-				default:
-					String message = String.format(Locale.getDefault(), "The specified lexer action type %s is not valid.", action.getActionType());
-					throw new IllegalArgumentException(message);
-				}
+			data.add(s.ruleIndex);
+
+			if ( s.getStateType() == ATNState.LOOP_END ) {
+				data.add(((LoopEndState)s).loopBackState.stateNumber);
+			}
+			else if ( s instanceof BlockStartState ) {
+				data.add(((BlockStartState)s).endState.stateNumber);
 			}
-		}
 
-		boolean isJava = language.equals("Java");
-		for (int i = 1; i < data.size(); i++) {
-			int value = data.get(i);
-			if (value < Character.MIN_VALUE || value > Character.MAX_VALUE) {
-				throw new UnsupportedOperationException("Serialized ATN data element " +
-						value + " element " + i + " out of range " + (int) Character.MIN_VALUE + ".." + (int) Character.MAX_VALUE);
+			if (s.getStateType() != ATNState.RULE_STOP) {
+				// the deserializer can trivially derive these edges, so there's no need to serialize them
+				nedges += s.getNumberOfTransitions();
 			}
 
-			data.set(i, isJava ? (value + 2) & 0xFFFF : value);
+			for (int i=0; i<s.getNumberOfTransitions(); i++) {
+				Transition t = s.transition(i);
+				int edgeType = Transition.serializationTypes.get(t.getClass());
+				if ( edgeType == Transition.SET || edgeType == Transition.NOT_SET ) {
+					SetTransition st = (SetTransition)t;
+					sets.put(st.set, true);
+				}
+			}
 		}
-
-		return data;
+		return nedges;
 	}
 
-	private static void serializeSets(
-			IntegerList data,
-			Collection<IntervalSet> sets,
-			CodePointSerializer codePointSerializer)
-	{
+	private static void serializeSets(IntegerList data, Collection<IntervalSet> sets) {
 		int nSets = sets.size();
 		data.add(nSets);
 
@@ -392,209 +352,18 @@ private static void serializeSets(
 						continue;
 					}
 					else {
-						codePointSerializer.serializeCodePoint(data, 0);
+						data.add(0);
 					}
 				}
 				else {
-					codePointSerializer.serializeCodePoint(data, I.a);
+					data.add(I.a);
 				}
-
-				codePointSerializer.serializeCodePoint(data, I.b);
+				data.add(I.b);
 			}
 		}
 	}
 
-	public String decode(char[] data) {
-		data = data.clone();
-		// don't adjust the first value since that's the version number
-		for (int i = 1; i < data.length; i++) {
-			data[i] = (char)(data[i] - 2);
-		}
-
-		StringBuilder buf = new StringBuilder();
-		int p = 0;
-		int version = ATNDeserializer.toInt(data[p++]);
-		if (version != ATNDeserializer.SERIALIZED_VERSION) {
-			String reason = String.format("Could not deserialize ATN with version %d (expected %d).", version, ATNDeserializer.SERIALIZED_VERSION);
-			throw new UnsupportedOperationException(new InvalidClassException(ATN.class.getName(), reason));
-		}
-
-		p++; // skip grammarType
-		int maxType = ATNDeserializer.toInt(data[p++]);
-		buf.append("max type ").append(maxType).append("\n");
-		int nstates = ATNDeserializer.toInt(data[p++]);
-		for (int i=0; i<nstates; i++) {
-			int stype = ATNDeserializer.toInt(data[p++]);
-            if ( stype==ATNState.INVALID_TYPE ) continue; // ignore bad type of states
-			int ruleIndex = ATNDeserializer.toInt(data[p++]);
-			if (ruleIndex == Character.MAX_VALUE) {
-				ruleIndex = -1;
-			}
-
-			String arg = "";
-			if ( stype == ATNState.LOOP_END ) {
-				int loopBackStateNumber = ATNDeserializer.toInt(data[p++]);
-				arg = " "+loopBackStateNumber;
-			}
-			else if ( stype == ATNState.PLUS_BLOCK_START || stype == ATNState.STAR_BLOCK_START || stype == ATNState.BLOCK_START ) {
-				int endStateNumber = ATNDeserializer.toInt(data[p++]);
-				arg = " "+endStateNumber;
-			}
-			buf.append(i).append(":")
-				.append(ATNState.serializationNames.get(stype)).append(" ")
-				.append(ruleIndex).append(arg).append("\n");
-		}
-		// this code is meant to model the form of ATNDeserializer.deserialize,
-		// since both need to be updated together whenever a change is made to
-		// the serialization format. The "dead" code is only used in debugging
-		// and testing scenarios, so the form you see here was kept for
-		// improved maintainability.
-		// start
-		int numNonGreedyStates = ATNDeserializer.toInt(data[p++]);
-		for (int i = 0; i < numNonGreedyStates; i++) {
-			int stateNumber = ATNDeserializer.toInt(data[p++]);
-		}
-		int numPrecedenceStates = ATNDeserializer.toInt(data[p++]);
-		for (int i = 0; i < numPrecedenceStates; i++) {
-			int stateNumber = ATNDeserializer.toInt(data[p++]);
-		}
-		// finish
-		int nrules = ATNDeserializer.toInt(data[p++]);
-		for (int i=0; i<nrules; i++) {
-			int s = ATNDeserializer.toInt(data[p++]);
-            if (atn.grammarType == ATNType.LEXER) {
-                int arg1 = ATNDeserializer.toInt(data[p++]);
-                buf.append("rule ").append(i).append(":").append(s).append(" ").append(arg1).append('\n');
-            }
-            else {
-                buf.append("rule ").append(i).append(":").append(s).append('\n');
-            }
-		}
-		int nmodes = ATNDeserializer.toInt(data[p++]);
-		for (int i=0; i<nmodes; i++) {
-			int s = ATNDeserializer.toInt(data[p++]);
-			buf.append("mode ").append(i).append(":").append(s).append('\n');
-		}
-		int numBMPSets = ATNDeserializer.toInt(data[p++]);
-		p = appendSets(buf, data, p, numBMPSets, 0, ATNDeserializer.getUnicodeDeserializer(ATNDeserializer.UnicodeDeserializingMode.UNICODE_BMP));
-		int numSMPSets = ATNDeserializer.toInt(data[p++]);
-		p = appendSets(buf, data, p, numSMPSets, numBMPSets, ATNDeserializer.getUnicodeDeserializer(ATNDeserializer.UnicodeDeserializingMode.UNICODE_SMP));
-		int nedges = ATNDeserializer.toInt(data[p++]);
-		for (int i=0; i<nedges; i++) {
-			int src = ATNDeserializer.toInt(data[p]);
-			int trg = ATNDeserializer.toInt(data[p + 1]);
-			int ttype = ATNDeserializer.toInt(data[p + 2]);
-			int arg1 = ATNDeserializer.toInt(data[p + 3]);
-			int arg2 = ATNDeserializer.toInt(data[p + 4]);
-			int arg3 = ATNDeserializer.toInt(data[p + 5]);
-			buf.append(src).append("->").append(trg)
-				.append(" ").append(Transition.serializationNames.get(ttype))
-				.append(" ").append(arg1).append(",").append(arg2).append(",").append(arg3)
-				.append("\n");
-			p += 6;
-		}
-		int ndecisions = ATNDeserializer.toInt(data[p++]);
-		for (int i=0; i<ndecisions; i++) {
-			int s = ATNDeserializer.toInt(data[p++]);
-			buf.append(i).append(":").append(s).append("\n");
-		}
-		if (atn.grammarType == ATNType.LEXER) {
-			// this code is meant to model the form of ATNDeserializer.deserialize,
-			// since both need to be updated together whenever a change is made to
-			// the serialization format. The "dead" code is only used in debugging
-			// and testing scenarios, so the form you see here was kept for
-			// improved maintainability.
-			int lexerActionCount = ATNDeserializer.toInt(data[p++]);
-			for (int i = 0; i < lexerActionCount; i++) {
-				LexerActionType actionType = LexerActionType.values()[ATNDeserializer.toInt(data[p++])];
-				int data1 = ATNDeserializer.toInt(data[p++]);
-				int data2 = ATNDeserializer.toInt(data[p++]);
-			}
-		}
-		return buf.toString();
-	}
-
-	private int appendSets(StringBuilder buf, char[] data, int p, int nsets, int setIndexOffset, ATNDeserializer.UnicodeDeserializer unicodeDeserializer) {
-		for (int i=0; i<nsets; i++) {
-			int nintervals = ATNDeserializer.toInt(data[p++]);
-			buf.append(i+setIndexOffset).append(":");
-			boolean containsEof = data[p++] != 0;
-			if (containsEof) {
-				buf.append(getTokenName(Token.EOF));
-			}
-
-			for (int j=0; j<nintervals; j++) {
-				if ( containsEof || j>0 ) {
-					buf.append(", ");
-				}
-
-				int a = unicodeDeserializer.readUnicode(data, p);
-				p += unicodeDeserializer.size();
-				int b = unicodeDeserializer.readUnicode(data, p);
-				p += unicodeDeserializer.size();
-				buf.append(getTokenName(a)).append("..").append(getTokenName(b));
-			}
-			buf.append("\n");
-		}
-		return p;
-	}
-
-	public String getTokenName(int t) {
-		if ( t==-1 ) return "EOF";
-
-		if ( atn.grammarType == ATNType.LEXER &&
-			 t >= Character.MIN_VALUE && t <= Character.MAX_VALUE )
-		{
-			switch (t) {
-			case '\n':
-				return "'\\n'";
-			case '\r':
-				return "'\\r'";
-			case '\t':
-				return "'\\t'";
-			case '\b':
-				return "'\\b'";
-			case '\f':
-				return "'\\f'";
-			case '\\':
-				return "'\\\\'";
-			case '\'':
-				return "'\\''";
-			default:
-				if ( Character.UnicodeBlock.of((char)t)==Character.UnicodeBlock.BASIC_LATIN &&
-					 !Character.isISOControl((char)t) ) {
-					return '\''+Character.toString((char)t)+'\'';
-				}
-				// turn on the bit above max "\uFFFF" value so that we pad with zeros
-				// then only take last 4 digits
-				String hex = Integer.toHexString(t|0x10000).toUpperCase().substring(1,5);
-				String unicodeStr = "'\\u"+hex+"'";
-				return unicodeStr;
-			}
-		}
-
-		if (tokenNames != null && t >= 0 && t < tokenNames.size()) {
-			return tokenNames.get(t);
-		}
-
-		return String.valueOf(t);
-	}
-
-	/** Used by Java target to encode short/int array as chars in string. */
-	public static String getSerializedAsString(ATN atn, String language) {
-		return new String(getSerializedAsChars(atn, language));
-	}
-
-	public static IntegerList getSerialized(ATN atn, String language) {
-		return new ATNSerializer(atn).serialize(language);
-	}
-
-	public static char[] getSerializedAsChars(ATN atn, String language) {
-		return Utils.toCharArray(getSerialized(atn, language));
-	}
-
-	private void serializeInt(IntegerList data, int value) {
-		data.add((char)value);
-		data.add((char)(value >> 16));
+	public static IntegerList getSerialized(ATN atn) {
+		return new ATNSerializer(atn).serialize();
 	}
 }
diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/ATNSimulator.java b/runtime/Java/src/org/antlr/v4/runtime/atn/ATNSimulator.java
index f889a1d188..b7670abc77 100644
--- a/runtime/Java/src/org/antlr/v4/runtime/atn/ATNSimulator.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/atn/ATNSimulator.java
@@ -85,66 +85,4 @@ public PredictionContext getCachedContext(PredictionContext context) {
 													  visited);
 		}
 	}
-
-	/**
-	 * @deprecated Use {@link ATNDeserializer#deserialize} instead.
-	 */
-	@Deprecated
-	public static ATN deserialize(char[] data) {
-		return new ATNDeserializer().deserialize(data);
-	}
-
-	/**
-	 * @deprecated Use {@link ATNDeserializer#checkCondition(boolean)} instead.
-	 */
-	@Deprecated
-	public static void checkCondition(boolean condition) {
-		new ATNDeserializer().checkCondition(condition);
-	}
-
-	/**
-	 * @deprecated Use {@link ATNDeserializer#checkCondition(boolean, String)} instead.
-	 */
-	@Deprecated
-	public static void checkCondition(boolean condition, String message) {
-		new ATNDeserializer().checkCondition(condition, message);
-	}
-
-	/**
-	 * @deprecated Use {@link ATNDeserializer#toInt} instead.
-	 */
-	@Deprecated
-	public static int toInt(char c) {
-		return ATNDeserializer.toInt(c);
-	}
-
-	/**
-	 * @deprecated Use {@link ATNDeserializer#toInt32} instead.
-	 */
-	@Deprecated
-	public static int toInt32(char[] data, int offset) {
-		return ATNDeserializer.toInt32(data, offset);
-	}
-
-	/**
-	 * @deprecated Use {@link ATNDeserializer#edgeFactory} instead.
-	 */
-	@Deprecated
-
-	public static Transition edgeFactory(ATN atn,
-										 int type, int src, int trg,
-										 int arg1, int arg2, int arg3,
-										 List<IntervalSet> sets)
-	{
-		return new ATNDeserializer().edgeFactory(atn, type, src, trg, arg1, arg2, arg3, sets);
-	}
-
-	/**
-	 * @deprecated Use {@link ATNDeserializer#stateFactory} instead.
-	 */
-	@Deprecated
-	public static ATNState stateFactory(int type, int ruleIndex) {
-		return new ATNDeserializer().stateFactory(type, ruleIndex);
-	}
-
 }
diff --git a/runtime/Java/src/org/antlr/v4/runtime/misc/IntegerList.java b/runtime/Java/src/org/antlr/v4/runtime/misc/IntegerList.java
index 090670d5a3..d6c700d8f6 100644
--- a/runtime/Java/src/org/antlr/v4/runtime/misc/IntegerList.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/misc/IntegerList.java
@@ -276,8 +276,9 @@ private void ensureCapacity(int capacity) {
 		_data = Arrays.copyOf(_data, newLength);
 	}
 
-	/** Convert the list to a UTF-16 encoded char array. If all values are less
-	 *  than the 0xFFFF 16-bit code point limit then this is just a char array
+	/** Convert the int list to a char array where values > 0x7FFFF take 2 bytes. TODO?????
+	 *  If all values are less
+	 *  than the 0x7FFF 16-bit code point limit (1 bit taken to indicatethen this is just a char array
 	 *  of 16-bit char as usual. For values in the supplementary range, encode
 	 * them as two UTF-16 code units.
 	 */
diff --git a/runtime/Java/src/org/antlr/v4/runtime/misc/InterpreterDataReader.java b/runtime/Java/src/org/antlr/v4/runtime/misc/InterpreterDataReader.java
index 953bc6b757..94980e1532 100644
--- a/runtime/Java/src/org/antlr/v4/runtime/misc/InterpreterDataReader.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/misc/InterpreterDataReader.java
@@ -20,7 +20,7 @@
 
 // A class to read plain text interpreter data produced by ANTLR.
 public class InterpreterDataReader {
-	
+
 	public static class InterpreterData {
 	  ATN atn;
 	  Vocabulary vocabulary;
@@ -28,39 +28,39 @@ public static class InterpreterData {
 	  List<String> channels; // Only valid for lexer grammars.
 	  List<String> modes; // ditto
 	};
-	
+
 	/**
 	 * The structure of the data file is very simple. Everything is line based with empty lines
 	 * separating the different parts. For lexers the layout is:
 	 * token literal names:
 	 * ...
-	 * 
+	 *
 	 * token symbolic names:
 	 * ...
-	 * 
+	 *
 	 * rule names:
 	 * ...
-	 * 
+	 *
 	 * channel names:
 	 * ...
-	 * 
+	 *
 	 * mode names:
 	 * ...
-	 * 
+	 *
 	 * atn:
 	 * <a single line with comma separated int values> enclosed in a pair of squared brackets.
-	 * 
+	 *
 	 * Data for a parser does not contain channel and mode names.
 	 */
 	public static InterpreterData parseFile(String fileName) {
 		InterpreterData result = new InterpreterData();
 		result.ruleNames = new ArrayList<String>();
-		
+
 		try (BufferedReader br = new BufferedReader(new FileReader(fileName))) {
 		    String line;
 		  	List<String> literalNames = new ArrayList<String>();
 		  	List<String> symbolicNames = new ArrayList<String>();
-		
+
 			line = br.readLine();
 			if ( !line.equals("token literal names:") )
 				throw new RuntimeException("Unexpected data entry");
@@ -69,7 +69,7 @@ public static InterpreterData parseFile(String fileName) {
 					break;
 				literalNames.add(line.equals("null") ? "" : line);
 		    }
-		
+
 			line = br.readLine();
 			if ( !line.equals("token symbolic names:") )
 				throw new RuntimeException("Unexpected data entry");
@@ -89,7 +89,7 @@ public static InterpreterData parseFile(String fileName) {
 					break;
 				result.ruleNames.add(line);
 		    }
-		    
+
 			line = br.readLine();
 			if ( line.equals("channel names:") ) { // Additional lexer data.
 				result.channels = new ArrayList<String>();
@@ -114,19 +114,11 @@ public static InterpreterData parseFile(String fileName) {
 		  	if ( !line.equals("atn:") )
 		  		throw new RuntimeException("Unexpected data entry");
 			line = br.readLine();
-			String[] elements = line.split(",");
-	  		char[] serializedATN = new char[elements.length];
-
-			for (int i = 0; i < elements.length; ++i) {
-				int value;
-				String element = elements[i];
-				if ( element.startsWith("[") )
-					value = Integer.parseInt(element.substring(1).trim());
-				else if ( element.endsWith("]") )
-					value = Integer.parseInt(element.substring(0, element.length() - 1).trim());
-				else
-					value = Integer.parseInt(element.trim());
-				serializedATN[i] = (char)value;					
+			String[] elements = line.substring(1,line.length()-1).split(",");
+	  		int[] serializedATN = new int[elements.length];
+
+			for (int i = 0; i < elements.length; ++i) { // ignore [...] on ends
+				serializedATN[i] = Integer.parseInt(elements[i].trim());
 			}
 
 		  	ATNDeserializer deserializer = new ATNDeserializer();
@@ -135,8 +127,8 @@ else if ( element.endsWith("]") )
 		catch (java.io.IOException e) {
 			// We just swallow the error and return empty objects instead.
 		}
-		
+
 		return result;
 	}
-	
+
 }
diff --git a/runtime/JavaScript/src/antlr4/atn/ATNDeserializer.js b/runtime/JavaScript/src/antlr4/atn/ATNDeserializer.js
index 35897e650d..a6f8144dcd 100644
--- a/runtime/JavaScript/src/antlr4/atn/ATNDeserializer.js
+++ b/runtime/JavaScript/src/antlr4/atn/ATNDeserializer.js
@@ -82,10 +82,7 @@ class ATNDeserializer {
         this.readRules(atn);
         this.readModes(atn);
         const sets = [];
-        // First, deserialize sets with 16-bit arguments <= U+FFFF.
-        this.readSets(atn, sets, this.readInt.bind(this));
-        // Next, deserialize sets with 32-bit arguments <= U+10FFFF.
-        this.readSets(atn, sets, this.readInt32.bind(this));
+        this.readSets(atn, sets);
         this.readEdges(atn, sets);
         this.readDecisions(atn);
         this.readLexerActions(atn);
@@ -125,9 +122,6 @@ class ATNDeserializer {
                 continue;
             }
             let ruleIndex = this.readInt();
-            if (ruleIndex === 0xFFFF) {
-                ruleIndex = -1;
-            }
             const  s = this.stateFactory(stype, ruleIndex);
             if (stype === ATNState.LOOP_END) { // special case
                 const  loopBackStateNumber = this.readInt();
@@ -175,9 +169,6 @@ class ATNDeserializer {
             atn.ruleToStartState[i] = atn.states[s];
             if ( atn.grammarType === ATNType.LEXER ) {
                 let tokenType = this.readInt();
-                if (tokenType === 0xFFFF) {
-                    tokenType = Token.EOF;
-                }
                 atn.ruleToTokenType[i] = tokenType;
             }
         }
@@ -200,7 +191,7 @@ class ATNDeserializer {
         }
     }
 
-    readSets(atn, sets, readUnicode) {
+    readSets(atn, sets) {
         const m = this.readInt();
         for (let i=0; i<m; i++) {
             const iset = new IntervalSet();
@@ -211,8 +202,8 @@ class ATNDeserializer {
                 iset.addOne(-1);
             }
             for (let j=0; j<n; j++) {
-                const i1 = readUnicode();
-                const i2 = readUnicode();
+                const i1 = this.readInt();
+                const i2 = this.readInt();
                 iset.addRange(i1, i2);
             }
         }
@@ -301,14 +292,7 @@ class ATNDeserializer {
             for (let i=0; i<count; i++) {
                 const actionType = this.readInt();
                 let data1 = this.readInt();
-                if (data1 === 0xFFFF) {
-                    data1 = -1;
-                }
                 let data2 = this.readInt();
-                if (data2 === 0xFFFF) {
-                    data2 = -1;
-                }
-
                 atn.lexerActions[i] = this.lexerActionFactory(actionType, data1, data2);
             }
         }
@@ -497,12 +481,6 @@ class ATNDeserializer {
         return this.data[this.pos++];
     }
 
-    readInt32() {
-        const low = this.readInt();
-        const high = this.readInt();
-        return low | (high << 16);
-    }
-
     edgeFactory(atn, type, src, trg, arg1, arg2, arg3, sets) {
         const target = atn.states[trg];
         switch(type) {
diff --git a/runtime/Python2/src/antlr4/atn/ATNDeserializer.py b/runtime/Python2/src/antlr4/atn/ATNDeserializer.py
index 6bda32ce5e..f8bc5133ca 100644
--- a/runtime/Python2/src/antlr4/atn/ATNDeserializer.py
+++ b/runtime/Python2/src/antlr4/atn/ATNDeserializer.py
@@ -30,10 +30,7 @@ def deserialize(self, data):
         self.readRules(atn)
         self.readModes(atn)
         sets = []
-        # First, read all sets with 16-bit Unicode code points <= U+FFFF.
-        self.readSets(atn, sets, self.readInt)
-        # Next, deserialize sets with 32-bit arguments <= U+10FFFF.
-        self.readSets(atn, sets, self.readInt32)
+        self.readSets(atn, sets)
         self.readEdges(atn, sets)
         self.readDecisions(atn)
         self.readLexerActions(atn)
@@ -67,9 +64,6 @@ def readStates(self, atn):
                 atn.addState(None)
                 continue
             ruleIndex = self.readInt()
-            if ruleIndex == 0xFFFF:
-                ruleIndex = -1
-
             s = self.stateFactory(stype, ruleIndex)
             if stype == ATNState.LOOP_END: # special case
                 loopBackStateNumber = self.readInt()
@@ -109,9 +103,6 @@ def readRules(self, atn):
             atn.ruleToStartState[i] = startState
             if atn.grammarType == ATNType.LEXER:
                 tokenType = self.readInt()
-                if tokenType == 0xFFFF:
-                    tokenType = Token.EOF
-
                 atn.ruleToTokenType[i] = tokenType
 
         atn.ruleToStopState = [0] * nrules
@@ -127,7 +118,7 @@ def readModes(self, atn):
             s = self.readInt()
             atn.modeToStartState.append(atn.states[s])
 
-    def readSets(self, atn, sets, readUnicode):
+    def readSets(self, atn, sets):
         m = self.readInt()
         for i in range(0, m):
             iset = IntervalSet()
@@ -137,8 +128,8 @@ def readSets(self, atn, sets, readUnicode):
             if containsEof!=0:
                 iset.addOne(-1)
             for j in range(0, n):
-                i1 = readUnicode()
-                i2 = readUnicode()
+                i1 = self.readInt()
+                i2 = self.readInt()
                 iset.addRange(Interval(i1, i2 + 1)) # range upper limit is exclusive
 
     def readEdges(self, atn, sets):
@@ -203,11 +194,7 @@ def readLexerActions(self, atn):
             for i in range(0, count):
                 actionType = self.readInt()
                 data1 = self.readInt()
-                if data1 == 0xFFFF:
-                    data1 = -1
                 data2 = self.readInt()
-                if data2 == 0xFFFF:
-                    data2 = -1
                 lexerAction = self.lexerActionFactory(actionType, data1, data2)
                 atn.lexerActions[i] = lexerAction
 
diff --git a/runtime/Python3/src/antlr4/atn/ATNDeserializer.py b/runtime/Python3/src/antlr4/atn/ATNDeserializer.py
index ddeb18af58..e0495dd570 100644
--- a/runtime/Python3/src/antlr4/atn/ATNDeserializer.py
+++ b/runtime/Python3/src/antlr4/atn/ATNDeserializer.py
@@ -31,10 +31,7 @@ def deserialize(self, data : int):
         self.readRules(atn)
         self.readModes(atn)
         sets = []
-        # First, read all sets with 16-bit Unicode code points <= U+FFFF.
-        self.readSets(atn, sets, self.readInt)
-        # Next, deserialize sets with 32-bit arguments <= U+10FFFF.
-        self.readSets(atn, sets, self.readInt32)
+        self.readSets(atn, sets)
         self.readEdges(atn, sets)
         self.readDecisions(atn)
         self.readLexerActions(atn)
@@ -69,9 +66,6 @@ def readStates(self, atn:ATN):
                 atn.addState(None)
                 continue
             ruleIndex = self.readInt()
-            if ruleIndex == 0xFFFF:
-                ruleIndex = -1
-
             s = self.stateFactory(stype, ruleIndex)
             if stype == ATNState.LOOP_END: # special case
                 loopBackStateNumber = self.readInt()
@@ -111,9 +105,6 @@ def readRules(self, atn:ATN):
             atn.ruleToStartState[i] = startState
             if atn.grammarType == ATNType.LEXER:
                 tokenType = self.readInt()
-                if tokenType == 0xFFFF:
-                    tokenType = Token.EOF
-
                 atn.ruleToTokenType[i] = tokenType
 
         atn.ruleToStopState = [0] * nrules
@@ -129,7 +120,7 @@ def readModes(self, atn:ATN):
             s = self.readInt()
             atn.modeToStartState.append(atn.states[s])
 
-    def readSets(self, atn:ATN, sets:list, readUnicode:Callable[[], int]):
+    def readSets(self, atn:ATN, sets:list):
         m = self.readInt()
         for i in range(0, m):
             iset = IntervalSet()
@@ -139,8 +130,8 @@ def readSets(self, atn:ATN, sets:list, readUnicode:Callable[[], int]):
             if containsEof!=0:
                 iset.addOne(-1)
             for j in range(0, n):
-                i1 = readUnicode()
-                i2 = readUnicode()
+                i1 = self.readInt()
+                i2 = self.readInt()
                 iset.addRange(range(i1, i2 + 1)) # range upper limit is exclusive
 
     def readEdges(self, atn:ATN, sets:list):
@@ -205,11 +196,7 @@ def readLexerActions(self, atn:ATN):
             for i in range(0, count):
                 actionType = self.readInt()
                 data1 = self.readInt()
-                if data1 == 0xFFFF:
-                    data1 = -1
                 data2 = self.readInt()
-                if data2 == 0xFFFF:
-                    data2 = -1
                 lexerAction = self.lexerActionFactory(actionType, data1, data2)
                 atn.lexerActions[i] = lexerAction
 
@@ -379,11 +366,6 @@ def readInt(self):
         self.pos += 1
         return i
 
-    def readInt32(self):
-        low = self.readInt()
-        high = self.readInt()
-        return low | (high << 16)
-
     edgeFactories = [ lambda args : None,
                       lambda atn, src, trg, arg1, arg2, arg3, sets, target : EpsilonTransition(target),
                       lambda atn, src, trg, arg1, arg2, arg3, sets, target : \
diff --git a/runtime/Swift/Sources/Antlr4/atn/ATNDeserializer.swift b/runtime/Swift/Sources/Antlr4/atn/ATNDeserializer.swift
index 96b176399f..8677ebf6fb 100644
--- a/runtime/Swift/Sources/Antlr4/atn/ATNDeserializer.swift
+++ b/runtime/Swift/Sources/Antlr4/atn/ATNDeserializer.swift
@@ -16,7 +16,6 @@ public class ATNDeserializer {
     }
 
     public func deserialize(_ data: [Int]) throws -> ATN {
-//         let data = str.utf16.map { element in Int(element) }
         var p = 0
 
         let version = data[p]
@@ -50,10 +49,6 @@ public class ATNDeserializer {
 
             var ruleIndex = data[p]
             p += 1
-            if ruleIndex == UInt16.max {
-                ruleIndex = -1
-            }
-
             let s = try stateFactory(stype, ruleIndex)!
             if stype == ATNState.LOOP_END {
                 // special case
@@ -109,10 +104,6 @@ public class ATNDeserializer {
             if atn.grammarType == ATNType.lexer {
                 var tokenType = data[p]
                 p += 1
-                if tokenType == UInt16.max {
-                    tokenType = CommonToken.EOF
-                }
-
                 ruleToTokenType.append(tokenType)
             }
         }
@@ -139,11 +130,7 @@ public class ATNDeserializer {
         //
         var sets = [IntervalSet]()
 
-        // First, deserialize sets with 16-bit arguments <= U+FFFF.
-        readSets(data, &p, &sets, readUnicodeInt)
-
-        // Next, deserialize sets with 32-bit arguments <= U+10FFFF.
-        readSets(data, &p, &sets, readUnicodeInt32)
+        readSets(data, &p, &sets, readInt)
 
         //
         // EDGES
@@ -194,16 +181,8 @@ public class ATNDeserializer {
                 p += 1
                 var data1 = data[p]
                 p += 1
-                if data1 == UInt16.max {
-                    data1 = -1
-                }
-
                 var data2 = data[p]
                 p += 1
-                if data2 == UInt16.max {
-                    data2 = -1
-                }
-
                 let lexerAction = lexerActionFactory(actionType, data1, data2)
                 lexerActions.append(lexerAction)
             }
@@ -214,18 +193,12 @@ public class ATNDeserializer {
         return atn
     }
 
-    private func readUnicodeInt(_ data: [Int], _ p: inout Int) -> Int {
+    private func readInt(_ data: [Int], _ p: inout Int) -> Int {
         let result = data[p]
         p += 1
         return result
     }
 
-    private func readUnicodeInt32(_ data: [Int], _ p: inout Int) -> Int {
-        let result = toInt32(data[p..<p+2].map{Character(UnicodeScalar($0)!)}, 0)
-        p += 2
-        return result
-    }
-
     private func readSets(_ data: [Int], _ p: inout Int, _ sets: inout [IntervalSet], _ readUnicode: ([Int], inout Int) -> Int) {
         let nsets = data[p]
         p += 1
diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/ATNDescriber.java b/tool-testsuite/test/org/antlr/v4/test/tool/ATNDescriber.java
new file mode 100644
index 0000000000..dbc1a6bad2
--- /dev/null
+++ b/tool-testsuite/test/org/antlr/v4/test/tool/ATNDescriber.java
@@ -0,0 +1,203 @@
+package org.antlr.v4.test.tool;
+
+import org.antlr.v4.runtime.Token;
+import org.antlr.v4.runtime.atn.*;
+
+import java.io.InvalidClassException;
+import java.util.List;
+
+/** Make human readable set of ints from serialized ATN like this (for debugging / testing):
+ *
+ * max type 1
+ * 0:TOKEN_START -1
+ * 1:RULE_START 0
+ * 2:RULE_STOP 0
+ * 3:BASIC 0
+ * 4:BASIC 0
+ * rule 0:1 1
+ * mode 0:0
+ * 0:'a'..128169
+ * 0->1 EPSILON 0,0,0
+ * 1->3 EPSILON 0,0,0
+ * 3->4 SET 0,0,0
+ * 4->2 EPSILON 0,0,0
+ * 0:0
+ */
+public class ATNDescriber {
+	public ATN atn;
+	private List<String> tokenNames;
+
+	public ATNDescriber(ATN atn, List<String> tokenNames) {
+		assert atn.grammarType != null;
+		this.atn = atn;
+		this.tokenNames = tokenNames;
+	}
+
+	/** For testing really; gives a human readable version of the ATN */
+	public String decode(int[] data) {
+		StringBuilder buf = new StringBuilder();
+		int p = 0;
+		int version = data[p++];
+		if (version != ATNDeserializer.SERIALIZED_VERSION) {
+			String reason = String.format("Could not deserialize ATN with version %d (expected %d).", version, ATNDeserializer.SERIALIZED_VERSION);
+			throw new UnsupportedOperationException(new InvalidClassException(ATN.class.getName(), reason));
+		}
+
+		p++; // skip grammarType
+		int maxType = data[p++];
+		buf.append("max type ").append(maxType).append("\n");
+		int nstates = data[p++];
+		for (int i=0; i<nstates; i++) {
+			int stype = data[p++];
+			if ( stype== ATNState.INVALID_TYPE ) continue; // ignore bad type of states
+			int ruleIndex = data[p++];
+			if (ruleIndex == Character.MAX_VALUE) {
+				ruleIndex = -1;
+			}
+
+			String arg = "";
+			if ( stype == ATNState.LOOP_END ) {
+				int loopBackStateNumber = data[p++];
+				arg = " "+loopBackStateNumber;
+			}
+			else if ( stype == ATNState.PLUS_BLOCK_START || stype == ATNState.STAR_BLOCK_START || stype == ATNState.BLOCK_START ) {
+				int endStateNumber = data[p++];
+				arg = " "+endStateNumber;
+			}
+			buf.append(i).append(":")
+					.append(ATNState.serializationNames.get(stype)).append(" ")
+					.append(ruleIndex).append(arg).append("\n");
+		}
+		// this code is meant to model the form of ATNDeserializer.deserialize,
+		// since both need to be updated together whenever a change is made to
+		// the serialization format. The "dead" code is only used in debugging
+		// and testing scenarios, so the form you see here was kept for
+		// improved maintainability.
+		// start
+		int numNonGreedyStates = data[p++];
+		for (int i = 0; i < numNonGreedyStates; i++) {
+			int stateNumber = data[p++];
+		}
+		int numPrecedenceStates = data[p++];
+		for (int i = 0; i < numPrecedenceStates; i++) {
+			int stateNumber = data[p++];
+		}
+		// finish
+		int nrules = data[p++];
+		for (int i=0; i<nrules; i++) {
+			int s = data[p++];
+			if (atn.grammarType == ATNType.LEXER) {
+				int arg1 = data[p++];
+				buf.append("rule ").append(i).append(":").append(s).append(" ").append(arg1).append('\n');
+			}
+			else {
+				buf.append("rule ").append(i).append(":").append(s).append('\n');
+			}
+		}
+		int nmodes = data[p++];
+		for (int i=0; i<nmodes; i++) {
+			int s = data[p++];
+			buf.append("mode ").append(i).append(":").append(s).append('\n');
+		}
+		int numBMPSets = data[p++];
+		p = appendSets(buf, data, p, numBMPSets);
+		int nedges = data[p++];
+		for (int i=0; i<nedges; i++) {
+			int src = data[p];
+			int trg = data[p + 1];
+			int ttype = data[p + 2];
+			int arg1 = data[p + 3];
+			int arg2 = data[p + 4];
+			int arg3 = data[p + 5];
+			buf.append(src).append("->").append(trg)
+					.append(" ").append(Transition.serializationNames.get(ttype))
+					.append(" ").append(arg1).append(",").append(arg2).append(",").append(arg3)
+					.append("\n");
+			p += 6;
+		}
+		int ndecisions = data[p++];
+		for (int i=0; i<ndecisions; i++) {
+			int s = data[p++];
+			buf.append(i).append(":").append(s).append("\n");
+		}
+		if (atn.grammarType == ATNType.LEXER) {
+			// this code is meant to model the form of ATNDeserializer.deserialize,
+			// since both need to be updated together whenever a change is made to
+			// the serialization format. The "dead" code is only used in debugging
+			// and testing scenarios, so the form you see here was kept for
+			// improved maintainability.
+			int lexerActionCount = data[p++];
+			for (int i = 0; i < lexerActionCount; i++) {
+				LexerActionType actionType = LexerActionType.values()[data[p++]];
+				int data1 = data[p++];
+				int data2 = data[p++];
+			}
+		}
+		return buf.toString();
+	}
+
+	private int appendSets(StringBuilder buf, int[] data, int p, int nsets) {
+		for (int i=0; i<nsets; i++) {
+			int nintervals = data[p++];
+			buf.append(i).append(":");
+			boolean containsEof = data[p++] != 0;
+			if (containsEof) {
+				buf.append(getTokenName(Token.EOF));
+			}
+
+			for (int j=0; j<nintervals; j++) {
+				if ( containsEof || j>0 ) {
+					buf.append(", ");
+				}
+
+				int a = data[p++];
+				int b = data[p++];
+				buf.append(getTokenName(a)).append("..").append(getTokenName(b));
+			}
+			buf.append("\n");
+		}
+		return p;
+	}
+
+	public String getTokenName(int t) {
+		if ( t==-1 ) return "EOF";
+
+		if ( atn.grammarType == ATNType.LEXER &&
+				t >= Character.MIN_VALUE && t <= Character.MAX_VALUE )
+		{
+			switch (t) {
+				case '\n':
+					return "'\\n'";
+				case '\r':
+					return "'\\r'";
+				case '\t':
+					return "'\\t'";
+				case '\b':
+					return "'\\b'";
+				case '\f':
+					return "'\\f'";
+				case '\\':
+					return "'\\\\'";
+				case '\'':
+					return "'\\''";
+				default:
+					if ( Character.UnicodeBlock.of((char)t)==Character.UnicodeBlock.BASIC_LATIN &&
+							!Character.isISOControl((char)t) ) {
+						return '\''+Character.toString((char)t)+'\'';
+					}
+					// turn on the bit above max "\uFFFF" value so that we pad with zeros
+					// then only take last 4 digits
+					String hex = Integer.toHexString(t|0x10000).toUpperCase().substring(1,5);
+					String unicodeStr = "'\\u"+hex+"'";
+					return unicodeStr;
+			}
+		}
+
+		if (tokenNames != null && t >= 0 && t < tokenNames.size()) {
+			return tokenNames.get(t);
+		}
+
+		return String.valueOf(t);
+	}
+
+}
diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestATNDeserialization.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestATNDeserialization.java
index 01301672fd..25d8127ad0 100644
--- a/tool-testsuite/test/org/antlr/v4/test/tool/TestATNDeserialization.java
+++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestATNDeserialization.java
@@ -9,7 +9,7 @@
 import org.antlr.v4.runtime.atn.ATN;
 import org.antlr.v4.runtime.atn.ATNDeserializer;
 import org.antlr.v4.runtime.atn.ATNSerializer;
-import org.antlr.v4.runtime.misc.Utils;
+import org.antlr.v4.runtime.misc.IntegerList;
 import org.antlr.v4.tool.Grammar;
 import org.antlr.v4.tool.LexerGrammar;
 import org.junit.Before;
@@ -17,6 +17,9 @@
 
 import java.util.Arrays;
 
+import static org.antlr.v4.runtime.atn.ATNDeserializer.encodeIntsWith16BitWords;
+import static org.antlr.v4.runtime.atn.ATNDeserializer.decodeIntsEncodedAs16BitWords;
+import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
 
 public class TestATNDeserialization extends BaseJavaToolTest {
@@ -151,21 +154,40 @@ public void testSetUp() throws Exception {
 
 	@Test public void test2ModesInLexer() throws Exception {
 		LexerGrammar lg = new LexerGrammar(
-			"lexer grammar L;\n"+
-			"A : 'a'\n ;\n" +
-			"mode M;\n" +
-			"B : 'b';\n" +
-			"mode M2;\n" +
-			"C : 'c';\n");
+				"lexer grammar L;\n"+
+						"A : 'a'\n ;\n" +
+						"mode M;\n" +
+						"B : 'b';\n" +
+						"mode M2;\n" +
+						"C : 'c';\n");
+		checkDeserializationIsStable(lg);
+	}
+
+	@Test public void testLastValidBMPCharInSet() throws Exception {
+		LexerGrammar lg = new LexerGrammar(
+				"lexer grammar L;\n" +
+						"ID : 'Ā'..'\\uFFFC'; // FFFD+ are not valid char\n");
 		checkDeserializationIsStable(lg);
 	}
 
 	protected void checkDeserializationIsStable(Grammar g) {
 		ATN atn = createATN(g, false);
-		char[] data = Utils.toCharArray(ATNSerializer.getSerialized(atn, "Java"));
-		String atnData = TestATNSerialization.getDecoded(atn, Arrays.asList(g.getTokenNames()));
-		ATN atn2 = new ATNDeserializer().deserialize(data);
-		String atn2Data = TestATNSerialization.getDecoded(atn2, Arrays.asList(g.getTokenNames()));
+		IntegerList serialized = ATNSerializer.getSerialized(atn);
+		String atnData = new ATNDescriber(atn, Arrays.asList(g.getTokenNames())).decode(serialized.toArray());
+
+		IntegerList serialized16 = encodeIntsWith16BitWords(serialized);
+		int[] ints16 = serialized16.toArray();
+		char[] chars = new char[ints16.length];
+		for (int i = 0; i < ints16.length; i++) {
+			chars[i] = (char)ints16[i];
+		}
+		int[] serialized32 = decodeIntsEncodedAs16BitWords(chars, true);
+
+		assertArrayEquals(serialized.toArray(), serialized32);
+
+		ATN atn2 = new ATNDeserializer().deserialize(serialized.toArray());
+		IntegerList serialized1 = ATNSerializer.getSerialized(atn2);
+		String atn2Data = new ATNDescriber(atn2, Arrays.asList(g.getTokenNames())).decode(serialized1.toArray());
 
 		assertEquals(atnData, atn2Data);
 	}
diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestATNSerialization.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestATNSerialization.java
index e962e6131e..7f993cf975 100644
--- a/tool-testsuite/test/org/antlr/v4/test/tool/TestATNSerialization.java
+++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestATNSerialization.java
@@ -9,7 +9,6 @@
 import org.antlr.v4.runtime.atn.ATN;
 import org.antlr.v4.runtime.atn.ATNSerializer;
 import org.antlr.v4.runtime.misc.IntegerList;
-import org.antlr.v4.runtime.misc.Utils;
 import org.antlr.v4.tool.DOTGenerator;
 import org.antlr.v4.tool.Grammar;
 import org.antlr.v4.tool.LexerGrammar;
@@ -17,8 +16,10 @@
 import org.junit.Test;
 
 import java.util.Arrays;
-import java.util.List;
 
+import static org.antlr.v4.runtime.atn.ATNDeserializer.encodeIntsWith16BitWords;
+import static org.antlr.v4.runtime.atn.ATNDeserializer.decodeIntsEncodedAs16BitWords;
+import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
 
 public class TestATNSerialization extends BaseJavaToolTest {
@@ -45,31 +46,27 @@ public void testSetUp() throws Exception {
 				"2->3 ATOM 1,0,0\n" +
 				"3->4 ATOM 2,0,0\n" +
 				"4->1 EPSILON 0,0,0\n";
-		ATN atn = createATN(g, true);
-		String result = getDecoded(atn, Arrays.asList(g.getTokenNames()));
-		assertEquals(expecting, result);
+		checkResults(g, expecting);
 	}
 
 	@Test public void testEOF() throws Exception {
 		Grammar g = new Grammar(
-			"parser grammar T;\n"+
-			"a : A EOF ;");
+				"parser grammar T;\n"+
+						"a : A EOF ;");
 		String expecting =
-			"max type 1\n" +
-				"0:RULE_START 0\n" +
-				"1:RULE_STOP 0\n" +
-				"2:BASIC 0\n" +
-				"3:BASIC 0\n" +
-				"4:BASIC 0\n" +
-				"5:BASIC 0\n" +
-				"rule 0:0\n" +
-				"0->2 EPSILON 0,0,0\n" +
-				"2->3 ATOM 1,0,0\n" +
-				"3->4 ATOM 0,0,1\n" +
-				"4->1 EPSILON 0,0,0\n";
-		ATN atn = createATN(g, true);
-		String result = getDecoded(atn, Arrays.asList(g.getTokenNames()));
-		assertEquals(expecting, result);
+				"max type 1\n" +
+						"0:RULE_START 0\n" +
+						"1:RULE_STOP 0\n" +
+						"2:BASIC 0\n" +
+						"3:BASIC 0\n" +
+						"4:BASIC 0\n" +
+						"5:BASIC 0\n" +
+						"rule 0:0\n" +
+						"0->2 EPSILON 0,0,0\n" +
+						"2->3 ATOM 1,0,0\n" +
+						"3->4 ATOM 0,0,1\n" +
+						"4->1 EPSILON 0,0,0\n";
+		checkResults(g, expecting);
 	}
 
 	@Test public void testEOFInSet() throws Exception {
@@ -88,9 +85,7 @@ public void testSetUp() throws Exception {
 				"0->2 EPSILON 0,0,0\n" +
 				"2->3 SET 0,0,0\n" +
 				"3->1 EPSILON 0,0,0\n";
-		ATN atn = createATN(g, true);
-		String result = getDecoded(atn, Arrays.asList(g.getTokenNames()));
-		assertEquals(expecting, result);
+		checkResults(g, expecting);
 	}
 
 	@Test public void testNot() throws Exception {
@@ -111,8 +106,8 @@ public void testSetUp() throws Exception {
 			"2->3 NOT_SET 0,0,0\n" +
 			"3->1 EPSILON 0,0,0\n";
 		ATN atn = createATN(g, true);
-		DOTGenerator gen = new DOTGenerator(g);
-		String result = getDecoded(atn, Arrays.asList(g.getTokenNames()));
+		IntegerList serialized = ATNSerializer.getSerialized(atn);
+		String result = new ATNDescriber(atn, Arrays.asList(g.getTokenNames())).decode(serialized.toArray());
 		assertEquals(expecting, result);
 	}
 
@@ -132,9 +127,7 @@ public void testSetUp() throws Exception {
 			"0->2 EPSILON 0,0,0\n" +
 			"2->3 WILDCARD 0,0,0\n" +
 			"3->1 EPSILON 0,0,0\n";
-		ATN atn = createATN(g, true);
-		String result = getDecoded(atn, Arrays.asList(g.getTokenNames()));
-		assertEquals(expecting, result);
+		checkResults(g, expecting);
 	}
 
 	@Test public void testPEGAchillesHeel() throws Exception {
@@ -160,9 +153,7 @@ public void testSetUp() throws Exception {
 				"5->3 EPSILON 0,0,0\n" +
 				"6->1 EPSILON 0,0,0\n" +
 				"0:5\n";
-		ATN atn = createATN(g, true);
-		String result = getDecoded(atn, Arrays.asList(g.getTokenNames()));
-		assertEquals(expecting, result);
+		checkResults(g, expecting);
 	}
 
 	@Test public void test3Alts() throws Exception {
@@ -195,9 +186,7 @@ public void testSetUp() throws Exception {
 				"8->5 EPSILON 0,0,0\n" +
 				"9->1 EPSILON 0,0,0\n" +
 				"0:8\n";
-		ATN atn = createATN(g, true);
-		String result = getDecoded(atn, Arrays.asList(g.getTokenNames()));
-		assertEquals(expecting, result);
+		checkResults(g, expecting);
 	}
 
 	@Test public void testSimpleLoop() throws Exception {
@@ -227,9 +216,7 @@ public void testSetUp() throws Exception {
 				"7->8 ATOM 2,0,0\n" +
 				"8->1 EPSILON 0,0,0\n" +
 				"0:5\n";
-		ATN atn = createATN(g, true);
-		String result = getDecoded(atn, Arrays.asList(g.getTokenNames()));
-		assertEquals(expecting, result);
+		checkResults(g, expecting);
 	}
 
 	@Test public void testRuleRef() throws Exception {
@@ -256,9 +243,7 @@ public void testSetUp() throws Exception {
 				"5->1 EPSILON 0,0,0\n" +
 				"6->7 ATOM 1,0,0\n" +
 				"7->3 EPSILON 0,0,0\n";
-		ATN atn = createATN(g, true);
-		String result = getDecoded(atn, Arrays.asList(g.getTokenNames()));
-		assertEquals(expecting, result);
+		checkResults(g, expecting);
 	}
 
 	@Test public void testLexerTwoRules() throws Exception {
@@ -290,7 +275,8 @@ public void testSetUp() throws Exception {
 			"8->4 EPSILON 0,0,0\n" +
 			"0:0\n";
 		ATN atn = createATN(lg, true);
-		String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+		IntegerList serialized = ATNSerializer.getSerialized(atn);
+		String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
 		assertEquals(expecting, result);
 	}
 
@@ -314,7 +300,8 @@ public void testSetUp() throws Exception {
 			"4->2 EPSILON 0,0,0\n" +
 			"0:0\n";
 		ATN atn = createATN(lg, true);
-		String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+		IntegerList serialized = ATNSerializer.getSerialized(atn);
+		String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
 		assertEquals(expecting, result);
 	}
 
@@ -338,42 +325,88 @@ public void testSetUp() throws Exception {
 			"4->2 EPSILON 0,0,0\n" +
 			"0:0\n";
 		ATN atn = createATN(lg, true);
-		String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+		IntegerList serialized = ATNSerializer.getSerialized(atn);
+		String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
 		assertEquals(expecting, result);
 	}
 
-	@Test public void testLexerUnicodeSMPSetSerializedAfterBMPSet() throws Exception {
+	@Test public void testLexerUnicodeSMPAndBMPSetSerialized() throws Exception {
 		LexerGrammar lg = new LexerGrammar(
-			"lexer grammar L;\n"+
-			"SMP : ('\\u{1F4A9}' | '\\u{1F4AF}') ;\n"+
-			"BMP : ('a' | 'x') ;");
+				"lexer grammar L;\n"+
+						"SMP : ('\\u{1F4A9}' | '\\u{1F4AF}') ;\n"+
+						"BMP : ('a' | 'x') ;");
 		String expecting =
-			"max type 2\n" +
-			"0:TOKEN_START -1\n" +
-			"1:RULE_START 0\n" +
-			"2:RULE_STOP 0\n" +
-			"3:RULE_START 1\n" +
-			"4:RULE_STOP 1\n" +
-			"5:BASIC 0\n" +
-			"6:BASIC 0\n" +
-			"7:BASIC 1\n" +
-			"8:BASIC 1\n" +
-			"rule 0:1 1\n" +
-			"rule 1:3 2\n" +
-			"mode 0:0\n" +
-			"0:'a'..'a', 'x'..'x'\n" +
-			"1:128169..128169, 128175..128175\n" +
-			"0->1 EPSILON 0,0,0\n" +
-			"0->3 EPSILON 0,0,0\n" +
-			"1->5 EPSILON 0,0,0\n" +
-			"3->7 EPSILON 0,0,0\n" +
-			"5->6 SET 1,0,0\n" +
-			"6->2 EPSILON 0,0,0\n" +
-			"7->8 SET 0,0,0\n" +
-			"8->4 EPSILON 0,0,0\n" +
-			"0:0\n";
+				"max type 2\n" +
+						"0:TOKEN_START -1\n" +
+						"1:RULE_START 0\n" +
+						"2:RULE_STOP 0\n" +
+						"3:RULE_START 1\n" +
+						"4:RULE_STOP 1\n" +
+						"5:BASIC 0\n" +
+						"6:BASIC 0\n" +
+						"7:BASIC 1\n" +
+						"8:BASIC 1\n" +
+						"rule 0:1 1\n" +
+						"rule 1:3 2\n" +
+						"mode 0:0\n" +
+						"0:128169..128169, 128175..128175\n" +
+						"1:'a'..'a', 'x'..'x'\n" +
+						"0->1 EPSILON 0,0,0\n" +
+						"0->3 EPSILON 0,0,0\n" +
+						"1->5 EPSILON 0,0,0\n" +
+						"3->7 EPSILON 0,0,0\n" +
+						"5->6 SET 0,0,0\n" +
+						"6->2 EPSILON 0,0,0\n" +
+						"7->8 SET 1,0,0\n" +
+						"8->4 EPSILON 0,0,0\n" +
+						"0:0\n";
+		ATN atn = createATN(lg, true);
+		IntegerList serialized = ATNSerializer.getSerialized(atn);
+		String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
+		assertEquals(expecting, result);
+	}
+
+	@Test public void testLexerWith0xFFFCInSet() throws Exception {
+		LexerGrammar lg = new LexerGrammar(
+				"lexer grammar L;\n" +
+						"ID : ([A-Z_]|'Ā'..'\\uFFFC') ([A-Z_0-9]|'Ā'..'\\uFFFC')*; // FFFD+ are not valid char\n");
+		String expecting =
+				"max type 1\n" +
+				"0:TOKEN_START -1\n" +
+				"1:RULE_START 0\n" +
+				"2:RULE_STOP 0\n" +
+				"3:BASIC 0\n" +
+				"4:BLOCK_START 0 5\n" +
+				"5:BLOCK_END 0\n" +
+				"6:BASIC 0\n" +
+				"7:STAR_BLOCK_START 0 8\n" +
+				"8:BLOCK_END 0\n" +
+				"9:STAR_LOOP_ENTRY 0\n" +
+				"10:LOOP_END 0 11\n" +
+				"11:STAR_LOOP_BACK 0\n" +
+				"rule 0:1 1\n" +
+				"mode 0:0\n" +
+				"0:'A'..'Z', '_'..'_', '\\u0100'..'\\uFFFC'\n" +
+				"1:'0'..'9', 'A'..'Z', '_'..'_', '\\u0100'..'\\uFFFC'\n" +
+				"0->1 EPSILON 0,0,0\n" +
+				"1->4 EPSILON 0,0,0\n" +
+				"3->5 SET 0,0,0\n" +
+				"4->3 EPSILON 0,0,0\n" +
+				"5->9 EPSILON 0,0,0\n" +
+				"6->8 SET 1,0,0\n" +
+				"7->6 EPSILON 0,0,0\n" +
+				"8->11 EPSILON 0,0,0\n" +
+				"9->7 EPSILON 0,0,0\n" +
+				"9->10 EPSILON 0,0,0\n" +
+				"10->2 EPSILON 0,0,0\n" +
+				"11->9 EPSILON 0,0,0\n" +
+				"0:0\n" +
+				"1:4\n" +
+				"2:7\n" +
+				"3:9\n";
 		ATN atn = createATN(lg, true);
-		String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+		IntegerList serialized = ATNSerializer.getSerialized(atn);
+		String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
 		assertEquals(expecting, result);
 	}
 
@@ -397,7 +430,8 @@ public void testSetUp() throws Exception {
 			"4->2 EPSILON 0,0,0\n" +
 			"0:0\n";
 		ATN atn = createATN(lg, true);
-		String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+		IntegerList serialized = ATNSerializer.getSerialized(atn);
+		String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
 		assertEquals(expecting, result);
 	}
 
@@ -420,7 +454,8 @@ public void testSetUp() throws Exception {
 			"4->2 EPSILON 0,0,0\n" +
 			"0:0\n";
 		ATN atn = createATN(lg, true);
-		String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+		IntegerList serialized = ATNSerializer.getSerialized(atn);
+		String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
 		assertEquals(expecting, result);
 	}
 
@@ -445,7 +480,8 @@ public void testSetUp() throws Exception {
 				"5->2 EPSILON 0,0,0\n" +
 				"0:0\n";
 		ATN atn = createATN(lg, true);
-		String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+		IntegerList serialized = ATNSerializer.getSerialized(atn);
+		String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
 		assertEquals(expecting, result);
 	}
 
@@ -474,7 +510,8 @@ public void testSetUp() throws Exception {
 				"0:0\n" +
 				"1:5\n";
 		ATN atn = createATN(lg, true);
-		String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+		IntegerList serialized = ATNSerializer.getSerialized(atn);
+		String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
 		assertEquals(expecting, result);
 	}
 
@@ -505,7 +542,8 @@ public void testSetUp() throws Exception {
 				"0:0\n" +
 				"1:6\n";
 		ATN atn = createATN(lg, true);
-		String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+		IntegerList serialized = ATNSerializer.getSerialized(atn);
+		String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
 		assertEquals(expecting, result);
 	}
 
@@ -552,7 +590,8 @@ public void testSetUp() throws Exception {
 				"14->6 EPSILON 0,0,0\n" +
 				"0:0\n";
 		ATN atn = createATN(lg, true);
-		String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+		IntegerList serialized = ATNSerializer.getSerialized(atn);
+		String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
 		assertEquals(expecting, result);
 	}
 
@@ -575,9 +614,7 @@ public void testSetUp() throws Exception {
 			"3->4 NOT_SET 0,0,0\n" +
 			"4->2 EPSILON 0,0,0\n" +
 			"0:0\n";
-		ATN atn = createATN(lg, true);
-		String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
-		assertEquals(expecting, result);
+		checkResults(lg, expecting);
 	}
 
 	@Test public void testLexerSetWithRange() throws Exception {
@@ -600,7 +637,8 @@ public void testSetUp() throws Exception {
 			"4->2 EPSILON 0,0,0\n" +
 			"0:0\n";
 		ATN atn = createATN(lg, true);
-		String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+		IntegerList serialized = ATNSerializer.getSerialized(atn);
+		String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
 		assertEquals(expecting, result);
 	}
 
@@ -624,7 +662,8 @@ public void testSetUp() throws Exception {
 			"4->2 EPSILON 0,0,0\n" +
 			"0:0\n";
 		ATN atn = createATN(lg, true);
-		String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+		IntegerList serialized = ATNSerializer.getSerialized(atn);
+		String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
 		assertEquals(expecting, result);
 	}
 
@@ -648,7 +687,8 @@ public void testSetUp() throws Exception {
 			"4->2 EPSILON 0,0,0\n" +
 			"0:0\n";
 		ATN atn = createATN(lg, true);
-		String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+		IntegerList serialized = ATNSerializer.getSerialized(atn);
+		String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
 		assertEquals(expecting, result);
 	}
 
@@ -672,7 +712,8 @@ public void testSetUp() throws Exception {
 			"4->2 EPSILON 0,0,0\n" +
 			"0:0\n";
 		ATN atn = createATN(lg, true);
-		String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+		IntegerList serialized = ATNSerializer.getSerialized(atn);
+		String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
 		assertEquals(expecting, result);
 	}
 
@@ -696,7 +737,8 @@ public void testSetUp() throws Exception {
 			"4->2 EPSILON 0,0,0\n" +
 			"0:0\n";
 		ATN atn = createATN(lg, true);
-		String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+		IntegerList serialized = ATNSerializer.getSerialized(atn);
+		String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
 		assertEquals(expecting, result);
 	}
 
@@ -720,7 +762,8 @@ public void testSetUp() throws Exception {
 			"4->2 EPSILON 0,0,0\n" +
 			"0:0\n";
 		ATN atn = createATN(lg, true);
-		String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+		IntegerList serialized = ATNSerializer.getSerialized(atn);
+		String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
 		assertEquals(expecting, result);
 	}
 
@@ -744,7 +787,8 @@ public void testSetUp() throws Exception {
 			"4->2 EPSILON 0,0,0\n" +
 			"0:0\n";
 		ATN atn = createATN(lg, true);
-		String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+		IntegerList serialized = ATNSerializer.getSerialized(atn);
+		String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
 		assertEquals(expecting, result);
 	}
 
@@ -768,7 +812,8 @@ public void testSetUp() throws Exception {
 			"4->2 EPSILON 0,0,0\n" +
 			"0:0\n";
 		ATN atn = createATN(lg, true);
-		String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+		IntegerList serialized = ATNSerializer.getSerialized(atn);
+		String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
 		assertEquals(expecting, result);
 	}
 
@@ -792,7 +837,8 @@ public void testSetUp() throws Exception {
 			"4->2 EPSILON 0,0,0\n" +
 			"0:0\n";
 		ATN atn = createATN(lg, true);
-		String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+		IntegerList serialized = ATNSerializer.getSerialized(atn);
+		String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
 		assertEquals(expecting, result);
 	}
 
@@ -816,7 +862,8 @@ public void testSetUp() throws Exception {
 			"4->2 EPSILON 0,0,0\n" +
 			"0:0\n";
 		ATN atn = createATN(lg, true);
-		String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+		IntegerList serialized = ATNSerializer.getSerialized(atn);
+		String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
 		assertEquals(expecting, result);
 	}
 
@@ -840,7 +887,8 @@ public void testSetUp() throws Exception {
 			"4->2 EPSILON 0,0,0\n" +
 			"0:0\n";
 		ATN atn = createATN(lg, true);
-		String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+		IntegerList serialized = ATNSerializer.getSerialized(atn);
+		String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
 		assertEquals(expecting, result);
 	}
 
@@ -903,7 +951,8 @@ public void testSetUp() throws Exception {
 				"1:1\n" +
 				"2:11\n";
 		ATN atn = createATN(lg, true);
-		String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+		IntegerList serialized = ATNSerializer.getSerialized(atn);
+		String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
 		assertEquals(expecting, result);
 	}
 
@@ -930,7 +979,8 @@ public void testSetUp() throws Exception {
 				"5->2 EPSILON 0,0,0\n" +
 				"0:0\n";
 		ATN atn = createATN(lg, true);
-		String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+		IntegerList serialized = ATNSerializer.getSerialized(atn);
+		String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
 		assertEquals(expecting, result);
 	}
 
@@ -987,7 +1037,8 @@ public void testSetUp() throws Exception {
 			"0:0\n" +
 			"1:1\n";
 		ATN atn = createATN(lg, true);
-		String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+		IntegerList serialized = ATNSerializer.getSerialized(atn);
+		String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
 		assertEquals(expecting, result);
 	}
 
@@ -1037,14 +1088,23 @@ public void testSetUp() throws Exception {
 			"0:0\n" +
 			"1:1\n" +
 			"2:2\n";
-		ATN atn = createATN(lg, true);
-		String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
-		assertEquals(expecting, result);
+		checkResults(lg, expecting);
 	}
 
-	public static String getDecoded(ATN atn, List<String> tokenNames) {
-		IntegerList serialized = ATNSerializer.getSerialized(atn, "Java");
-		char[] data = Utils.toCharArray(serialized);
-		return new ATNSerializer(atn, tokenNames).decode(data);
+	private void checkResults(Grammar g, String expecting) {
+		ATN atn = createATN(g, true);
+		IntegerList serialized = ATNSerializer.getSerialized(atn);
+		String result = new ATNDescriber(atn, Arrays.asList(g.getTokenNames())).decode(serialized.toArray());
+		assertEquals(expecting, result);
+
+		IntegerList serialized16 = encodeIntsWith16BitWords(serialized);
+		int[] ints16 = serialized16.toArray();
+		char[] chars = new char[ints16.length];
+		for (int i = 0; i < ints16.length; i++) {
+			chars[i] = (char)ints16[i];
+		}
+		int[] serialized32 = decodeIntsEncodedAs16BitWords(chars, true);
+
+		assertArrayEquals(serialized.toArray(), serialized32);
 	}
 }
diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/CharSupportTest.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestCharSupport.java
similarity index 99%
rename from tool-testsuite/test/org/antlr/v4/test/tool/CharSupportTest.java
rename to tool-testsuite/test/org/antlr/v4/test/tool/TestCharSupport.java
index e9ceab2ae4..eb3ba3ad18 100644
--- a/tool-testsuite/test/org/antlr/v4/test/tool/CharSupportTest.java
+++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestCharSupport.java
@@ -11,8 +11,7 @@
 import org.junit.Assert;
 import org.junit.Test;
 
-public class CharSupportTest {
-
+public class TestCharSupport {
 	@Test
 	public void testGetANTLRCharLiteralForChar() {
 		Assert.assertEquals("'<INVALID>'",
diff --git a/tool-testsuite/test/org/antlr/v4/misc/UtilsTest.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestUtils.java
similarity index 97%
rename from tool-testsuite/test/org/antlr/v4/misc/UtilsTest.java
rename to tool-testsuite/test/org/antlr/v4/test/tool/TestUtils.java
index 0c008224f8..c9ba151888 100644
--- a/tool-testsuite/test/org/antlr/v4/misc/UtilsTest.java
+++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestUtils.java
@@ -1,15 +1,14 @@
-package org.antlr.v4.misc;
+package org.antlr.v4.test.tool;
 
 import org.antlr.runtime.Token;
-import org.antlr.v4.codegen.CodeGenerator;
+import org.antlr.v4.misc.Utils;
 import org.antlr.v4.tool.ast.GrammarAST;
 import org.junit.Assert;
 import org.junit.Test;
 
 import java.util.ArrayList;
 
-public class UtilsTest {
-
+public class TestUtils {
 	@Test
 	public void testStripFileExtension() {
 		Assert.assertNull(Utils.stripFileExtension(null));
diff --git a/tool/resources/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg b/tool/resources/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg
index 1bb19827bc..9ac3fea418 100644
--- a/tool/resources/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg
+++ b/tool/resources/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg
@@ -79,7 +79,7 @@ public:
 
   const antlr4::dfa::Vocabulary& getVocabulary() const override;
 
-  virtual const std::vector\<uint16_t>& getSerializedATN() const override;
+  virtual const std::vector\<int32_t>& getSerializedATN() const override;
   virtual const antlr4::atn::ATN& getATN() const override;
 
   <if (actionFuncs)>
@@ -138,7 +138,7 @@ struct <lexer.name; format = "cap">StaticData final {
   const std::vector\<std::string> literalNames;
   const std::vector\<std::string> symbolicNames;
   const antlr4::dfa::Vocabulary vocabulary;
-  std::vector\<uint16_t> serializedATN;
+  std::vector\<int32_t> serializedATN;
   std::unique_ptr\<antlr4::atn::ATN> atn;
 };
 
@@ -199,7 +199,7 @@ const dfa::Vocabulary& <lexer.name>::getVocabulary() const {
   return <lexer.grammarName; format = "lower">LexerStaticData->vocabulary;
 }
 
-const std::vector\<uint16_t>& <lexer.name>::getSerializedATN() const {
+const std::vector\<int32_t>& <lexer.name>::getSerializedATN() const {
   return <lexer.grammarName; format = "lower">LexerStaticData->serializedATN;
 }
 
@@ -307,7 +307,7 @@ public:
 
   const antlr4::dfa::Vocabulary& getVocabulary() const override;
 
-  const std::vector\<uint16_t>& getSerializedATN() const override;
+  const std::vector\<int32_t>& getSerializedATN() const override;
 
   <namedActions.members>
 
@@ -356,7 +356,7 @@ struct <parser.name; format = "cap">StaticData final {
   const std::vector\<std::string> literalNames;
   const std::vector\<std::string> symbolicNames;
   const antlr4::dfa::Vocabulary vocabulary;
-  std::vector\<uint16_t> serializedATN;
+  std::vector\<int32_t> serializedATN;
   std::unique_ptr\<antlr4::atn::ATN> atn;
 };
 
@@ -407,7 +407,7 @@ const dfa::Vocabulary& <parser.name>::getVocabulary() const {
   return <parser.grammarName; format = "lower">ParserStaticData->vocabulary;
 }
 
-const std::vector\<uint16_t>& <parser.name>::getSerializedATN() const {
+const std::vector\<int32_t>& <parser.name>::getSerializedATN() const {
   return <parser.grammarName; format = "lower">ParserStaticData->serializedATN;
 }
 
@@ -438,7 +438,7 @@ SerializedATNHeader(model) ::= <<
 >>
 
 SerializedATN(model) ::= <<
-static const uint16_t serializedATNSegment[] = {
+static const int32_t serializedATNSegment[] = {
 	<model.serialized: {s | <s>}; separator=",", wrap>
 };
 staticData->serializedATN.reserve(sizeof(serializedATNSegment) / sizeof(serializedATNSegment[0]));
diff --git a/tool/resources/org/antlr/v4/tool/templates/codegen/Go/Go.stg b/tool/resources/org/antlr/v4/tool/templates/codegen/Go/Go.stg
index f95fc8e34f..a4d4b52dce 100644
--- a/tool/resources/org/antlr/v4/tool/templates/codegen/Go/Go.stg
+++ b/tool/resources/org/antlr/v4/tool/templates/codegen/Go/Go.stg
@@ -151,7 +151,7 @@ type <parser.name> struct {
 
 var <parser.grammarName; format="lower">ParserStaticData struct {
   once                   sync.Once
-  serializedATN          []uint16
+  serializedATN          []int32
   literalNames           []string
   symbolicNames          []string
   ruleNames              []string
@@ -180,7 +180,7 @@ func <parser.grammarName; format="lower">ParserInit() {
   staticData.predictionContextCache = antlr.NewPredictionContextCache()
   staticData.serializedATN = <atn>
   deserializer := antlr.NewATNDeserializer(nil)
-  staticData.atn = deserializer.DeserializeFromUInt16(staticData.serializedATN)
+  staticData.atn = deserializer.Deserialize(staticData.serializedATN)
   atn := staticData.atn
   staticData.decisionToDFA = make([]*antlr.DFA, len(atn.DecisionToState))
   decisionToDFA := staticData.decisionToDFA
@@ -1424,7 +1424,7 @@ type <lexer.name> struct {
 
 var <lexer.grammarName; format="lower">LexerStaticData struct {
   once                   sync.Once
-  serializedATN          []uint16
+  serializedATN          []int32
   channelNames           []string
   modeNames              []string
   literalNames           []string
@@ -1461,7 +1461,7 @@ func <lexer.grammarName; format="lower">LexerInit() {
   staticData.predictionContextCache = antlr.NewPredictionContextCache()
   staticData.serializedATN = <atn>
   deserializer := antlr.NewATNDeserializer(nil)
-  staticData.atn = deserializer.DeserializeFromUInt16(staticData.serializedATN)
+  staticData.atn = deserializer.Deserialize(staticData.serializedATN)
   atn := staticData.atn
   staticData.decisionToDFA = make([]*antlr.DFA, len(atn.DecisionToState))
   decisionToDFA := staticData.decisionToDFA
@@ -1541,7 +1541,7 @@ const <lexer.name><first(rest(lexer.escapedModeNames))> = 1
 >>
 
 SerializedATN(model) ::= <<
-<if(model.serialized)>[]uint16{
+<if(model.serialized)>[]int32{
 	<model.serialized; separator=", ", wrap>,
 }<endif>
 >>
diff --git a/tool/resources/org/antlr/v4/tool/templates/codegen/Java/Java.stg b/tool/resources/org/antlr/v4/tool/templates/codegen/Java/Java.stg
index 47aafe4afa..5aad297348 100644
--- a/tool/resources/org/antlr/v4/tool/templates/codegen/Java/Java.stg
+++ b/tool/resources/org/antlr/v4/tool/templates/codegen/Java/Java.stg
@@ -964,7 +964,7 @@ public class <lexer.name> extends <superClass; null="Lexer"> {
 }
 >>
 
-SerializedATN(model) ::= <<
+SerializedJavaATN(model) ::= <<
 <if(rest(model.segments))>
 <! requires segmented representation !>
 <model.segments:{segment|private static final String _serializedATNSegment<i0> =
diff --git a/tool/src/org/antlr/v4/Tool.java b/tool/src/org/antlr/v4/Tool.java
index e55cfab8ba..0b13ea23c4 100644
--- a/tool/src/org/antlr/v4/Tool.java
+++ b/tool/src/org/antlr/v4/Tool.java
@@ -738,10 +738,10 @@ public static String generateInterpreterData(Grammar g) {
 		}
 		content.append("\n");
 
-		IntegerList serializedATN = ATNSerializer.getSerialized(g.atn, g.getLanguage());
+		IntegerList serializedATN = ATNSerializer.getSerialized(g.atn);
 		// Uncomment if you'd like to write out histogram info on the numbers of
 		// each integer value:
-		// Utils.writeSerializedATNIntegerHistogram(g.name+"-histo.csv", serializedATN);
+		//Utils.writeSerializedATNIntegerHistogram(g.name+"-histo.csv", serializedATN);
 
 		content.append("atn:\n");
 		content.append(serializedATN.toString());
diff --git a/tool/src/org/antlr/v4/codegen/Target.java b/tool/src/org/antlr/v4/codegen/Target.java
index 5fb03409c8..618d891a21 100644
--- a/tool/src/org/antlr/v4/codegen/Target.java
+++ b/tool/src/org/antlr/v4/codegen/Target.java
@@ -337,7 +337,7 @@ protected boolean shouldUseUnicodeEscapeForCodePointInDoubleQuotedString(int cod
 	}
 
 	/** Assume 16-bit char */
-	public String encodeIntAsCharEscape(int v) {
+	public String encodeInt16AsCharEscape(int v) {
 		if (v < Character.MIN_VALUE || v > Character.MAX_VALUE) {
 			throw new IllegalArgumentException(String.format("Cannot encode the specified value: %d", v));
 		}
@@ -505,7 +505,7 @@ public String getBaseVisitorFileName(boolean header) {
 	 * in a single segment (a declaration in target language) of the serialized ATN.
 	 * E.g., in C++, a small segment length results in multiple decls like:
 	 *
-	 *   static const uint16_t serializedATNSegment1[] = {
+	 *   static const int32_t serializedATNSegment1[] = {
 	 *     0x7, 0x12, 0x2, 0x13, 0x7, 0x13, 0x2, 0x14, 0x7, 0x14, 0x2, 0x15, 0x7,
 	 *        0x15, 0x2, 0x16, 0x7, 0x16, 0x2, 0x17, 0x7, 0x17, 0x2, 0x18, 0x7,
 	 *        0x18, 0x2, 0x19, 0x7, 0x19, 0x2, 0x1a, 0x7, 0x1a, 0x2, 0x1b, 0x7,
diff --git a/tool/src/org/antlr/v4/codegen/model/Recognizer.java b/tool/src/org/antlr/v4/codegen/model/Recognizer.java
index 5b63bed4b2..8e07c29d2f 100644
--- a/tool/src/org/antlr/v4/codegen/model/Recognizer.java
+++ b/tool/src/org/antlr/v4/codegen/model/Recognizer.java
@@ -9,6 +9,7 @@
 import org.antlr.v4.codegen.OutputModelFactory;
 import org.antlr.v4.codegen.model.chunk.ActionChunk;
 import org.antlr.v4.codegen.model.chunk.ActionText;
+import org.antlr.v4.codegen.target.JavaTarget;
 import org.antlr.v4.tool.Grammar;
 import org.antlr.v4.tool.Rule;
 
@@ -49,6 +50,7 @@ public Recognizer(OutputModelFactory factory) {
 		super(factory);
 
 		Grammar g = factory.getGrammar();
+		CodeGenerator gen = factory.getGenerator();
 		grammarFileName = new File(g.fileName).getName();
 		grammarName = g.name;
 		name = g.getRecognizerName();
@@ -63,7 +65,12 @@ public Recognizer(OutputModelFactory factory) {
 
 		ruleNames = g.rules.keySet();
 		rules = g.rules.values();
-		atn = new SerializedATN(factory, g.atn);
+		if ( gen.getTarget() instanceof JavaTarget ) {
+			atn = new SerializedJavaATN(factory, g.atn);
+		}
+		else {
+			atn = new SerializedATN(factory, g.atn);
+		}
 		if (g.getOptionString("superClass") != null) {
 			superClass = new ActionText(null, g.getOptionString("superClass"));
 		}
@@ -71,7 +78,6 @@ public Recognizer(OutputModelFactory factory) {
 			superClass = null;
 		}
 
-		CodeGenerator gen = factory.getGenerator();
 		tokenNames = translateTokenStringsToTarget(g.getTokenDisplayNames(), gen);
 		literalNames = translateTokenStringsToTarget(g.getTokenLiteralNames(), gen);
 		symbolicNames = translateTokenStringsToTarget(g.getTokenSymbolicNames(), gen);
diff --git a/tool/src/org/antlr/v4/codegen/model/SerializedATN.java b/tool/src/org/antlr/v4/codegen/model/SerializedATN.java
index 366499bb84..ad01932ef3 100644
--- a/tool/src/org/antlr/v4/codegen/model/SerializedATN.java
+++ b/tool/src/org/antlr/v4/codegen/model/SerializedATN.java
@@ -12,32 +12,21 @@
 import org.antlr.v4.runtime.atn.ATNSerializer;
 import org.antlr.v4.runtime.misc.IntegerList;
 
+/** Represents a serialized ATN that is just a list of signed integers; works for all targets
+ *  except for java, which requires a 16-bit char encoding. See {@link SerializedJavaATN}.
+ */
 public class SerializedATN extends OutputModelObject {
-	public final String[] serialized;
-	public final String[][] segments;
+	public int[] serialized;
 
-	public SerializedATN(OutputModelFactory factory, ATN atn) {
+	public SerializedATN(OutputModelFactory factory) {
 		super(factory);
-		Target target = factory.getGenerator().getTarget();
-		IntegerList data = ATNSerializer.getSerialized(atn, target.getLanguage());
-		int size = data.size();
-		int segmentLimit = target.getSerializedATNSegmentLimit();
-		segments = new String[(int)(((long)size + segmentLimit - 1) / segmentLimit)][];
-		int segmentIndex = 0;
-
-		for (int i = 0; i < size; i += segmentLimit) {
-			int segmentSize = Math.min(i + segmentLimit, size) - i;
-			String[] segment = new String[segmentSize];
-			segments[segmentIndex++] = segment;
-			for (int j = 0; j < segmentSize; j++) {
-				segment[j] = target.encodeIntAsCharEscape(data.get(i + j));
-			}
-		}
-
-		serialized = segments[0];
 	}
 
-	public String[][] getSegments() {
-		return segments;
+	public SerializedATN(OutputModelFactory factory, ATN atn) {
+		super(factory);
+		IntegerList data = ATNSerializer.getSerialized(atn);
+		serialized = data.toArray();
 	}
+
+	public Object getSerialized() { return serialized; }
 }
diff --git a/tool/src/org/antlr/v4/codegen/model/SerializedJavaATN.java b/tool/src/org/antlr/v4/codegen/model/SerializedJavaATN.java
new file mode 100644
index 0000000000..46d53e755e
--- /dev/null
+++ b/tool/src/org/antlr/v4/codegen/model/SerializedJavaATN.java
@@ -0,0 +1,40 @@
+package org.antlr.v4.codegen.model;
+
+import org.antlr.v4.codegen.OutputModelFactory;
+import org.antlr.v4.codegen.Target;
+import org.antlr.v4.runtime.atn.ATN;
+import org.antlr.v4.runtime.atn.ATNDeserializer;
+import org.antlr.v4.runtime.atn.ATNSerializer;
+import org.antlr.v4.runtime.misc.IntegerList;
+
+/** A serialized ATN for the java target, which requires we use strings and 16-bit unicode values */
+public class SerializedJavaATN extends SerializedATN {
+	private final String[] serializedAsString;
+	private final String[][] segments;
+
+	public SerializedJavaATN(OutputModelFactory factory, ATN atn) {
+		super(factory);
+		IntegerList data = ATNSerializer.getSerialized(atn);
+		data = ATNDeserializer.encodeIntsWith16BitWords(data);
+
+		int size = data.size();
+		Target target = factory.getGenerator().getTarget();
+		int segmentLimit = target.getSerializedATNSegmentLimit();
+		segments = new String[(int)(((long)size + segmentLimit - 1) / segmentLimit)][];
+		int segmentIndex = 0;
+
+		for (int i = 0; i < size; i += segmentLimit) {
+			int segmentSize = Math.min(i + segmentLimit, size) - i;
+			String[] segment = new String[segmentSize];
+			segments[segmentIndex++] = segment;
+			for (int j = 0; j < segmentSize; j++) {
+				segment[j] = target.encodeInt16AsCharEscape(data.get(i + j));
+			}
+		}
+
+		serializedAsString = segments[0]; // serializedAsString is valid if only one segment
+	}
+
+	public Object getSerialized() { return serializedAsString; }
+	public String[][] getSegments() { return segments; }
+}
diff --git a/tool/src/org/antlr/v4/tool/Grammar.java b/tool/src/org/antlr/v4/tool/Grammar.java
index a64a825a0a..1406d004b1 100644
--- a/tool/src/org/antlr/v4/tool/Grammar.java
+++ b/tool/src/org/antlr/v4/tool/Grammar.java
@@ -29,10 +29,7 @@
 import org.antlr.v4.runtime.atn.ATNSerializer;
 import org.antlr.v4.runtime.atn.SemanticContext;
 import org.antlr.v4.runtime.dfa.DFA;
-import org.antlr.v4.runtime.misc.IntSet;
-import org.antlr.v4.runtime.misc.Interval;
-import org.antlr.v4.runtime.misc.IntervalSet;
-import org.antlr.v4.runtime.misc.Pair;
+import org.antlr.v4.runtime.misc.*;
 import org.antlr.v4.tool.ast.ActionAST;
 import org.antlr.v4.tool.ast.GrammarAST;
 import org.antlr.v4.tool.ast.GrammarASTWithOptions;
@@ -1322,13 +1319,22 @@ public LexerInterpreter createLexerInterpreter(CharStream input) {
 			return implicitLexer.createLexerInterpreter(input);
 		}
 
-		char[] serializedAtn = ATNSerializer.getSerializedAsChars(atn, getLanguage());
-		ATN deserialized = new ATNDeserializer().deserialize(serializedAtn);
 		List<String> allChannels = new ArrayList<String>();
 		allChannels.add("DEFAULT_TOKEN_CHANNEL");
 		allChannels.add("HIDDEN");
 		allChannels.addAll(channelValueToNameList);
-		return new LexerInterpreter(fileName, getVocabulary(), Arrays.asList(getRuleNames()), allChannels, ((LexerGrammar)this).modes.keySet(), deserialized, input);
+
+		// must run ATN through serializer to set some state flags
+		IntegerList serialized = ATNSerializer.getSerialized(atn);
+		ATN deserializedATN = new ATNDeserializer().deserialize(serialized.toArray());
+		return new LexerInterpreter(
+				fileName,
+				getVocabulary(),
+				Arrays.asList(getRuleNames()),
+				allChannels,
+				((LexerGrammar)this).modes.keySet(),
+				deserializedATN,
+				input);
 	}
 
 	/** @since 4.5.1 */
@@ -1336,9 +1342,11 @@ public GrammarParserInterpreter createGrammarParserInterpreter(TokenStream token
 		if (this.isLexer()) {
 			throw new IllegalStateException("A parser interpreter can only be created for a parser or combined grammar.");
 		}
-		char[] serializedAtn = ATNSerializer.getSerializedAsChars(atn, getLanguage());
-		ATN deserialized = new ATNDeserializer().deserialize(serializedAtn);
-		return new GrammarParserInterpreter(this, deserialized, tokenStream);
+		// must run ATN through serializer to set some state flags
+		IntegerList serialized = ATNSerializer.getSerialized(atn);
+		ATN deserializedATN = new ATNDeserializer().deserialize(serialized.toArray());
+
+		return new GrammarParserInterpreter(this, deserializedATN, tokenStream);
 	}
 
 	public ParserInterpreter createParserInterpreter(TokenStream tokenStream) {
@@ -1346,8 +1354,10 @@ public ParserInterpreter createParserInterpreter(TokenStream tokenStream) {
 			throw new IllegalStateException("A parser interpreter can only be created for a parser or combined grammar.");
 		}
 
-		char[] serializedAtn = ATNSerializer.getSerializedAsChars(atn, getLanguage());
-		ATN deserialized = new ATNDeserializer().deserialize(serializedAtn);
-		return new ParserInterpreter(fileName, getVocabulary(), Arrays.asList(getRuleNames()), deserialized, tokenStream);
+		// must run ATN through serializer to set some state flags
+		IntegerList serialized = ATNSerializer.getSerialized(atn);
+		ATN deserializedATN = new ATNDeserializer().deserialize(serialized.toArray());
+
+		return new ParserInterpreter(fileName, getVocabulary(), Arrays.asList(getRuleNames()), deserializedATN, tokenStream);
 	}
 }
diff --git a/tool/src/org/antlr/v4/tool/GrammarParserInterpreter.java b/tool/src/org/antlr/v4/tool/GrammarParserInterpreter.java
index effaac22e5..8c2ddecccc 100644
--- a/tool/src/org/antlr/v4/tool/GrammarParserInterpreter.java
+++ b/tool/src/org/antlr/v4/tool/GrammarParserInterpreter.java
@@ -24,6 +24,7 @@
 import org.antlr.v4.runtime.atn.PredictionMode;
 import org.antlr.v4.runtime.atn.RuleStartState;
 import org.antlr.v4.runtime.atn.StarLoopEntryState;
+import org.antlr.v4.runtime.misc.IntegerList;
 import org.antlr.v4.runtime.misc.Interval;
 import org.antlr.v4.runtime.tree.Trees;
 
@@ -401,12 +402,12 @@ public static ParserInterpreter deriveTempParserInterpreter(Grammar g, Parser or
 			}
 		}
 		else { // must've been a generated parser
-			char[] serializedAtn = ATNSerializer.getSerializedAsChars(originalParser.getATN(), g.getLanguage());
-			ATN deserialized = new ATNDeserializer().deserialize(serializedAtn);
+//			IntegerList serialized = ATNSerializer.getSerialized(originalParser.getATN(), g.getLanguage());
+//			ATN deserialized = new ATNDeserializer().deserialize(serialized.toArray());
 			parser = new ParserInterpreter(originalParser.getGrammarFileName(),
 										   originalParser.getVocabulary(),
 										   Arrays.asList(originalParser.getRuleNames()),
-										   deserialized,
+					                       originalParser.getATN(),
 										   tokens);
 		}