diff --git a/.travis.yml b/.travis.yml
index f9319f2ed..38f3a9803 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,5 +1,6 @@
 sudo: required
-dist: precise
+dist: trusty
+group: edge
 
 language: cpp
 cache:
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ceda71b1b..8b90c8705 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -50,7 +50,14 @@ if(CCACHE_FOUND)
 endif(CCACHE_FOUND)
 
 if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native -Wall -Wextra -Werror")
+    if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "powerpc" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "ppc64" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "ppc64le")
+      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mcpu=native")
+    else()
+      #FIXME: x86 is -march=native, but doesn't mean every arch is this option. To keep original project's compatibility, I leave this except POWER.
+      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
+    endif()
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Werror")
+    set(EXTRA_CXX_FLAGS -Weffc++ -Wswitch-default -Wfloat-equal -Wconversion -Wsign-conversion)
     if (RAPIDJSON_BUILD_CXX11)
         if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.7.0")
             set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x")
@@ -73,7 +80,14 @@ if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
         endif()
     endif()
 elseif (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native -Wall -Wextra -Werror -Wno-missing-field-initializers")
+    if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "powerpc" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "ppc64" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "ppc64le")
+      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mcpu=native")
+    else()
+      #FIXME: x86 is -march=native, but doesn't mean every arch is this option. To keep original project's compatibility, I leave this except POWER.
+      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
+    endif()
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Werror -Wno-missing-field-initializers")
+    set(EXTRA_CXX_FLAGS -Weffc++ -Wswitch-default -Wfloat-equal -Wconversion -Wimplicit-fallthrough)
     if (RAPIDJSON_BUILD_CXX11)
         set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
     endif()
@@ -90,6 +104,8 @@ elseif (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
 elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
     add_definitions(-D_CRT_SECURE_NO_WARNINGS=1)
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc")
+elseif (CMAKE_CXX_COMPILER_ID MATCHES "XL")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -qarch=auto")
 endif()
 
 #add extra search paths for libraries and includes
@@ -157,15 +173,35 @@ install(DIRECTORY example/
 # Provide config and version files to be used by other applications
 # ===============================
 
-export(PACKAGE ${PROJECT_NAME})
-
-# cmake-modules
-CONFIGURE_FILE(${PROJECT_NAME}Config.cmake.in
-    ${PROJECT_NAME}Config.cmake
-    @ONLY)
-CONFIGURE_FILE(${PROJECT_NAME}ConfigVersion.cmake.in
-    ${PROJECT_NAME}ConfigVersion.cmake
-    @ONLY)
+################################################################################
+# Export package for use from the build tree
+EXPORT( PACKAGE ${PROJECT_NAME} )
+
+# Create the RapidJSONConfig.cmake file for other cmake projects.
+# ... for the build tree
+SET( CONFIG_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
+SET( CONFIG_DIR ${CMAKE_CURRENT_BINARY_DIR})
+CONFIGURE_FILE( ${CMAKE_CURRENT_SOURCE_DIR}/${PROJECT_NAME}Config.cmake.in
+    ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake @ONLY )
+CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/${PROJECT_NAME}ConfigVersion.cmake.in
+    ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake @ONLY)
+
+# ... for the install tree
+SET( CMAKECONFIG_INSTALL_DIR lib/cmake/${PROJECT_NAME} )
+FILE( RELATIVE_PATH REL_INCLUDE_DIR
+    "${CMAKE_INSTALL_PREFIX}/${CMAKECONFIG_INSTALL_DIR}"
+    "${CMAKE_INSTALL_PREFIX}/include" )
+
+SET( ${PROJECT_NAME}_INCLUDE_DIR "\${${PROJECT_NAME}_CMAKE_DIR}/${REL_INCLUDE_DIR}" )
+SET( CONFIG_SOURCE_DIR )
+SET( CONFIG_DIR )
+CONFIGURE_FILE( ${CMAKE_CURRENT_SOURCE_DIR}/${PROJECT_NAME}Config.cmake.in
+    ${CMAKE_CURRENT_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/${PROJECT_NAME}Config.cmake @ONLY )
+
+INSTALL(FILES "${CMAKE_CURRENT_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/${PROJECT_NAME}Config.cmake"
+        DESTINATION ${CMAKECONFIG_INSTALL_DIR} )
+
+# Install files
 INSTALL(FILES
     ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake
     ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake
diff --git a/RapidJSONConfig.cmake.in b/RapidJSONConfig.cmake.in
index 9fa12186a..e3c65a541 100644
--- a/RapidJSONConfig.cmake.in
+++ b/RapidJSONConfig.cmake.in
@@ -1,3 +1,15 @@
-get_filename_component(RAPIDJSON_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
-set(RAPIDJSON_INCLUDE_DIRS "@INCLUDE_INSTALL_DIR@")
-message(STATUS "RapidJSON found. Headers: ${RAPIDJSON_INCLUDE_DIRS}")
+################################################################################
+# RapidJSON source dir
+set( RapidJSON_SOURCE_DIR "@CONFIG_SOURCE_DIR@")
+
+################################################################################
+# RapidJSON build dir
+set( RapidJSON_DIR "@CONFIG_DIR@")
+
+################################################################################
+# Compute paths
+get_filename_component(RapidJSON_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
+
+set( RapidJSON_INCLUDE_DIR  "@RapidJSON_INCLUDE_DIR@" )
+set( RapidJSON_INCLUDE_DIRS  "@RapidJSON_INCLUDE_DIR@" )
+message(STATUS "RapidJSON found. Headers: ${RapidJSON_INCLUDE_DIRS}")
diff --git a/bin/types/booleans.json b/bin/types/booleans.json
old mode 100755
new mode 100644
diff --git a/bin/types/floats.json b/bin/types/floats.json
old mode 100755
new mode 100644
diff --git a/bin/types/guids.json b/bin/types/guids.json
old mode 100755
new mode 100644
diff --git a/bin/types/integers.json b/bin/types/integers.json
old mode 100755
new mode 100644
diff --git a/bin/types/mixed.json b/bin/types/mixed.json
old mode 100755
new mode 100644
diff --git a/bin/types/nulls.json b/bin/types/nulls.json
old mode 100755
new mode 100644
diff --git a/bin/types/paragraphs.json b/bin/types/paragraphs.json
old mode 100755
new mode 100644
diff --git a/contrib/natvis/LICENSE b/contrib/natvis/LICENSE
new file mode 100644
index 000000000..f57da96cf
--- /dev/null
+++ b/contrib/natvis/LICENSE
@@ -0,0 +1,45 @@
+The MIT License (MIT)
+
+Copyright (c) 2017 Bart Muzzin
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+Derived from:
+
+The MIT License (MIT)
+
+Copyright (c) 2015 mojmir svoboda
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/contrib/natvis/README.md b/contrib/natvis/README.md
new file mode 100644
index 000000000..9685c7f7c
--- /dev/null
+++ b/contrib/natvis/README.md
@@ -0,0 +1,7 @@
+# rapidjson.natvis
+
+This file can be used as a [Visual Studio Visualizer](https://docs.microsoft.com/en-gb/visualstudio/debugger/create-custom-views-of-native-objects) to aid in visualizing rapidjson structures within the Visual Studio debugger. Natvis visualizers are supported in Visual Studio 2012 and later. To install, copy the file into this directory:
+
+`%USERPROFILE%\Documents\Visual Studio 2012\Visualizers`
+
+Each version of Visual Studio has a similar directory, it must be copied into each directory to be used with that particular version. In Visual Studio 2015 and later, this can be done without restarting Visual Studio (a new debugging session must be started).
diff --git a/contrib/natvis/rapidjson.natvis b/contrib/natvis/rapidjson.natvis
new file mode 100644
index 000000000..a804b7bf6
--- /dev/null
+++ b/contrib/natvis/rapidjson.natvis
@@ -0,0 +1,38 @@
+<?xml version="1.0" encoding="utf-8"?>
+<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
+	<!-- rapidjson::GenericValue - basic support -->
+	<Type Name="rapidjson::GenericValue&lt;*,*&gt;">
+		<DisplayString Condition="(data_.f.flags &amp; kTypeMask) == kNullType">null</DisplayString>
+		<DisplayString Condition="data_.f.flags == kTrueFlag">true</DisplayString>
+		<DisplayString Condition="data_.f.flags == kFalseFlag">false</DisplayString>
+		<DisplayString Condition="data_.f.flags == kShortStringFlag">{data_.ss.str}</DisplayString>
+		<DisplayString Condition="(data_.f.flags &amp; kTypeMask) == kStringType">{(const char*)((size_t)data_.s.str &amp; 0x0000FFFFFFFFFFFF)}</DisplayString>
+		<DisplayString Condition="(data_.f.flags &amp; kNumberIntFlag) == kNumberIntFlag">{data_.n.i.i}</DisplayString>
+		<DisplayString Condition="(data_.f.flags &amp; kNumberUintFlag) == kNumberUintFlag">{data_.n.u.u}</DisplayString>
+		<DisplayString Condition="(data_.f.flags &amp; kNumberInt64Flag) == kNumberInt64Flag">{data_.n.i64}</DisplayString>
+		<DisplayString Condition="(data_.f.flags &amp; kNumberUint64Flag) == kNumberUint64Flag">{data_.n.u64}</DisplayString>
+		<DisplayString Condition="(data_.f.flags &amp; kNumberDoubleFlag) == kNumberDoubleFlag">{data_.n.d}</DisplayString>
+		<DisplayString Condition="data_.f.flags == kObjectType">Object members={data_.o.size}</DisplayString>
+		<DisplayString Condition="data_.f.flags == kArrayType">Array members={data_.a.size}</DisplayString>
+		<Expand>
+			<Item Condition="data_.f.flags == kObjectType" Name="[size]">data_.o.size</Item>
+			<Item Condition="data_.f.flags == kObjectType" Name="[capacity]">data_.o.capacity</Item>
+			<ArrayItems Condition="data_.f.flags == kObjectType">
+				<Size>data_.o.size</Size>
+				<!-- NOTE: Rapidjson stores some extra data in the high bits of pointers, which is why the mask -->
+				<ValuePointer>(rapidjson::GenericMember&lt;$T1,$T2&gt;*)(((size_t)data_.o.members) &amp; 0x0000FFFFFFFFFFFF)</ValuePointer>
+			</ArrayItems>
+
+			<Item Condition="data_.f.flags == kArrayType" Name="[size]">data_.a.size</Item>
+			<Item Condition="data_.f.flags == kArrayType" Name="[capacity]">data_.a.capacity</Item>
+			<ArrayItems Condition="data_.f.flags == kArrayType">
+				<Size>data_.a.size</Size>
+				<!-- NOTE: Rapidjson stores some extra data in the high bits of pointers, which is why the mask -->
+				<ValuePointer>(rapidjson::GenericValue&lt;$T1,$T2&gt;*)(((size_t)data_.a.elements) &amp; 0x0000FFFFFFFFFFFF)</ValuePointer>
+			</ArrayItems>
+
+		</Expand>
+	</Type>
+
+</AutoVisualizer>
+
diff --git a/doc/Doxyfile.zh-cn.in b/doc/Doxyfile.zh-cn.in
index 87dd8661b..e7fffa678 100644
--- a/doc/Doxyfile.zh-cn.in
+++ b/doc/Doxyfile.zh-cn.in
@@ -777,7 +777,7 @@ INPUT                  = readme.zh-cn.md \
                          doc/sax.zh-cn.md \
                          doc/schema.zh-cn.md \
                          doc/performance.zh-cn.md \
-                         doc/internals.md \
+                         doc/internals.zh-cn.md \
                          doc/faq.zh-cn.md
 
 # This tag can be used to specify the character encoding of the source files
diff --git a/doc/diagram/move2.dot b/doc/diagram/move2.dot
index 7037ea6cb..2319871b9 100644
--- a/doc/diagram/move2.dot
+++ b/doc/diagram/move2.dot
@@ -18,7 +18,7 @@ digraph {
 
 		node [shape=Mrecord, style=filled, colorscheme=spectral7]
 
-		c1 [label="{contact:array|}", fillcolor=4]
+		c1 [label="{contacts:array|}", fillcolor=4]
 		c11 [label="{|}"]
 		c12 [label="{|}"]
 		c13 [shape="none", label="...", style="solid"]
@@ -41,13 +41,13 @@ digraph {
 
 		node [shape=Mrecord, style=filled, colorscheme=spectral7]
 
-		c2 [label="{contact:array|}", fillcolor=4]
+		c2 [label="{contacts:array|}", fillcolor=4]
 		c3 [label="{array|}", fillcolor=4]
 		c21 [label="{|}"]
 		c22 [label="{|}"]
 		c23 [shape=none, label="...", style="solid"]
 		o2 [label="{o:object|}", fillcolor=3]
-		cs [label="{string|\"contact\"}", fillcolor=5]
+		cs [label="{string|\"contacts\"}", fillcolor=5]
 		c31 [label="{|}"]
 		c32 [label="{|}"]
 		c33 [shape="none", label="...", style="solid"]
@@ -59,4 +59,4 @@ digraph {
 		c3 -> { c31; c32; c33 }
 	}
 	ghost -> o2 [style=invis]
-}
\ No newline at end of file
+}
diff --git a/doc/diagram/move3.dot b/doc/diagram/move3.dot
index c197b99df..57adb4f9d 100644
--- a/doc/diagram/move3.dot
+++ b/doc/diagram/move3.dot
@@ -19,7 +19,7 @@ digraph {
 
 		node [shape=Mrecord, style=filled, colorscheme=spectral7]
 
-		c1 [label="{contact:array|}", fillcolor=4]
+		c1 [label="{contacts:array|}", fillcolor=4]
 		c11 [label="{|}"]
 		c12 [label="{|}"]
 		c13 [shape=none, label="...", style="solid"]
@@ -42,13 +42,13 @@ digraph {
 
 		node [shape=Mrecord, style=filled, colorscheme=spectral7]
 
-		c2 [label="{contact:null|}", fillcolor=1]
+		c2 [label="{contacts:null|}", fillcolor=1]
 		c3 [label="{array|}", fillcolor=4]
 		c21 [label="{|}"]
 		c22 [label="{|}"]
 		c23 [shape="none", label="...", style="solid"]
 		o2 [label="{o:object|}", fillcolor=3]
-		cs [label="{string|\"contact\"}", fillcolor=5]
+		cs [label="{string|\"contacts\"}", fillcolor=5]
 		c2 -> o2 [style="dashed", constraint=false, label="AddMember", style=invis]
 
 		edge [arrowhead=vee]
@@ -57,4 +57,4 @@ digraph {
 		cs -> c3 [arrowhead=none]
 	}
 	ghost -> o2 [style=invis]
-}
\ No newline at end of file
+}
diff --git a/doc/encoding.zh-cn.md b/doc/encoding.zh-cn.md
index 681692355..808ba525f 100644
--- a/doc/encoding.zh-cn.md
+++ b/doc/encoding.zh-cn.md
@@ -14,7 +14,7 @@
 
 > (in §6) JSON may be represented using UTF-8, UTF-16, or UTF-32. When JSON is written in UTF-8, JSON is 8bit compatible.  When JSON is written in UTF-16 or UTF-32, the binary content-transfer-encoding must be used.
 > 
-> 翻译：JSON 可使用 UTF-8、UTF-16 或 UTF-18 表示。当 JSON 以 UTF-8 写入，该 JSON 是 8 位兼容的。当 JSON 以 UTF-16 或 UTF-32 写入，就必须使用二进制的内容传送编码。
+> 翻译：JSON 可使用 UTF-8、UTF-16 或 UTF-32 表示。当 JSON 以 UTF-8 写入，该 JSON 是 8 位兼容的。当 JSON 以 UTF-16 或 UTF-32 写入，就必须使用二进制的内容传送编码。
 
 RapidJSON 支持多种编码。它也能检查 JSON 的编码，以及在不同编码中进行转码。所有这些功能都是在内部实现，无需使用外部的程序库（如 [ICU](http://site.icu-project.org/)）。
 
diff --git a/doc/faq.md b/doc/faq.md
index 1b0541c27..4946cfeff 100644
--- a/doc/faq.md
+++ b/doc/faq.md
@@ -256,7 +256,7 @@ Alternatively, if we don't want to explicitly refer to the root value of `addres
 
 3. What is SIMD? How it is applied in RapidJSON?
 
-   [SIMD](http://en.wikipedia.org/wiki/SIMD) instructions can perform parallel computation in modern CPUs. RapidJSON support Intel's SSE2/SSE4.2 to accelerate whitespace skipping. This improves performance of parsing indent formatted JSON. Define `RAPIDJSON_SSE2` or `RAPIDJSON_SSE42` macro to enable this feature. However, running the executable on a machine without such instruction set support will make it crash.
+   [SIMD](http://en.wikipedia.org/wiki/SIMD) instructions can perform parallel computation in modern CPUs. RapidJSON support Intel's SSE2/SSE4.2 and ARM's Neon to accelerate whitespace/tabspace/carriage-return/line-feed skipping. This improves performance of parsing indent formatted JSON. Define `RAPIDJSON_SSE2`, `RAPIDJSON_SSE42` or `RAPIDJSON_NEON` macro to enable this feature. However, running the executable on a machine without such instruction set support will make it crash.
 
 4. Does it consume a lot of memory?
 
diff --git a/doc/faq.zh-cn.md b/doc/faq.zh-cn.md
index f12d83073..307b02f9d 100644
--- a/doc/faq.zh-cn.md
+++ b/doc/faq.zh-cn.md
@@ -257,7 +257,7 @@
 
 3. 什是是 SIMD？它如何用于 RapidJSON？
 
-   [SIMD](http://en.wikipedia.org/wiki/SIMD) 指令可以在现代 CPU 中执行并行运算。RapidJSON 支持了 Intel 的 SSE2/SSE4.2 去加速跳过空白字符。在解析含缩进的 JSON 时，这能提升性能。只要定义名为 `RAPIDJSON_SSE2` 或 `RAPIDJSON_SSE42` 的宏，就能启动这个功能。然而，若在不支持这些指令集的机器上执行这些可执行文件，会导致崩溃。
+   [SIMD](http://en.wikipedia.org/wiki/SIMD) 指令可以在现代 CPU 中执行并行运算。RapidJSON 支持使用 Intel 的 SSE2/SSE4.2 和 ARM 的 Neon 来加速对空白符、制表符、回车符和换行符的过滤处理。在解析含缩进的 JSON 时，这能提升性能。只要定义名为 `RAPIDJSON_SSE2` ，`RAPIDJSON_SSE42` 或 `RAPIDJSON_NEON` 的宏，就能启动这个功能。然而，若在不支持这些指令集的机器上执行这些可执行文件，会导致崩溃。
 
 4. 它会消耗许多内存么？
 
diff --git a/doc/internals.md b/doc/internals.md
index 49802a0fd..2fff2d9cb 100644
--- a/doc/internals.md
+++ b/doc/internals.md
@@ -183,17 +183,20 @@ void SkipWhitespace(InputStream& s) {
 
 However, this requires 4 comparisons and a few branching for each character. This was found to be a hot spot.
 
-To accelerate this process, SIMD was applied to compare 16 characters with 4 white spaces for each iteration. Currently RapidJSON only supports SSE2 and SSE4.2 instructions for this. And it is only activated for UTF-8 memory streams, including string stream or *in situ* parsing. 
+To accelerate this process, SIMD was applied to compare 16 characters with 4 white spaces for each iteration. Currently RapidJSON supports SSE2, SSE4.2 and ARM Neon instructions for this. And it is only activated for UTF-8 memory streams, including string stream or *in situ* parsing.
 
-To enable this optimization, need to define `RAPIDJSON_SSE2` or `RAPIDJSON_SSE42` before including `rapidjson.h`. Some compilers can detect the setting, as in `perftest.h`:
+To enable this optimization, need to define `RAPIDJSON_SSE2`, `RAPIDJSON_SSE42` or `RAPIDJSON_NEON` before including `rapidjson.h`. Some compilers can detect the setting, as in `perftest.h`:
 
 ~~~cpp
 // __SSE2__ and __SSE4_2__ are recognized by gcc, clang, and the Intel compiler.
 // We use -march=native with gmake to enable -msse2 and -msse4.2, if supported.
+// Likewise, __ARM_NEON is used to detect Neon.
 #if defined(__SSE4_2__)
 #  define RAPIDJSON_SSE42
 #elif defined(__SSE2__)
 #  define RAPIDJSON_SSE2
+#elif defined(__ARM_NEON)
+#  define RAPIDJSON_NEON
 #endif
 ~~~
 
diff --git a/doc/internals.zh-cn.md b/doc/internals.zh-cn.md
new file mode 100644
index 000000000..0c8bc068a
--- /dev/null
+++ b/doc/internals.zh-cn.md
@@ -0,0 +1,363 @@
+# 内部架构
+
+本部分记录了一些设计和实现细节。
+
+[TOC]
+
+# 架构 {#Architecture}
+
+## SAX 和 DOM
+
+下面的 UML 图显示了 SAX 和 DOM 的基本关系。
+
+![架构 UML 类图](diagram/architecture.png)
+
+关系的核心是 `Handler` 概念。在 SAX 一边，`Reader` 从流解析 JSON 并将事件发送到 `Handler`。`Writer` 实现了 `Handler` 概念，用于处理相同的事件。在 DOM 一边，`Document` 实现了 `Handler` 概念，用于通过这些时间来构建 DOM。`Value` 支持了 `Value::Accept(Handler&)` 函数，它可以将 DOM 转换为事件进行发送。
+
+在这个设计，SAX 是不依赖于 DOM 的。甚至 `Reader` 和 `Writer` 之间也没有依赖。这提供了连接事件发送器和处理器的灵活性。除此之外，`Value` 也是不依赖于 SAX 的。所以，除了将 DOM 序列化为 JSON 之外，用户也可以将其序列化为 XML，或者做任何其他事情。
+
+## 工具类
+
+SAX 和 DOM API 都依赖于3个额外的概念：`Allocator`、`Encoding` 和 `Stream`。它们的继承层次结构如下图所示。
+
+![工具类 UML 类图](diagram/utilityclass.png)
+
+# 值（Value） {#Value}
+
+`Value` （实际上被定义为 `GenericValue<UTF8<>>`）是 DOM API 的核心。本部分描述了它的设计。
+
+## 数据布局 {#DataLayout}
+
+`Value` 是[可变类型](http://en.wikipedia.org/wiki/Variant_type)。在 RapidJSON 的上下文中，一个 `Value` 的实例可以包含6种 JSON 数据类型之一。通过使用 `union` ，这是可能实现的。每一个 `Value` 包含两个成员：`union Data data_` 和 `unsigned flags_`。`flags_` 表明了 JSON 类型，以及附加的信息。
+
+下表显示了所有类型的数据布局。32位/64位列表明了字段所占用的字节数。
+
+| Null              |                                  | 32位 | 64位 |
+|-------------------|----------------------------------|:----:|:----:|
+| （未使用）        |                                  |4     |8     |
+| （未使用）        |                                  |4     |4     |
+| （未使用）        |                                  |4     |4     |
+| `unsigned flags_` | `kNullType kNullFlag`            |4     |4     |
+
+| Bool              |                                                    | 32位 | 64位 |
+|-------------------|----------------------------------------------------|:----:|:----:|
+| （未使用）        |                                                    |4     |8     |
+| （未使用）        |                                                    |4     |4     |
+| （未使用）        |                                                    |4     |4     |
+| `unsigned flags_` | `kBoolType` (either `kTrueFlag` or `kFalseFlag`)   |4     |4     |
+
+| String              |                                     | 32位 | 64位 |
+|---------------------|-------------------------------------|:----:|:----:|
+| `Ch* str`           | 指向字符串的指针（可能拥有所有权）  |4     |8     |
+| `SizeType length`   | 字符串长度                          |4     |4     |
+| （未使用）          |                                     |4     |4     |
+| `unsigned flags_`   | `kStringType kStringFlag ...`       |4     |4     |
+
+| Object              |                                     | 32位 | 64位 |
+|---------------------|-------------------------------------|:----:|:----:|
+| `Member* members`   | 指向成员数组的指针（拥有所有权）    |4     |8     |
+| `SizeType size`     | 成员数量                            |4     |4     |
+| `SizeType capacity` | 成员容量                            |4     |4     |
+| `unsigned flags_`   | `kObjectType kObjectFlag`           |4     |4     |
+
+| Array               |                                     | 32位 | 64位 |
+|---------------------|-------------------------------------|:----:|:----:|
+| `Value* values`     | 指向值数组的指针（拥有所有权）      |4     |8     |
+| `SizeType size`     | 值数量                              |4     |4     |
+| `SizeType capacity` | 值容量                              |4     |4     |
+| `unsigned flags_`   | `kArrayType kArrayFlag`             |4     |4     |
+
+| Number (Int)        |                                     | 32位 | 64位 |
+|---------------------|-------------------------------------|:----:|:----:|
+| `int i`             | 32位有符号整数                      |4     |4     |
+| （零填充）          | 0                                   |4     |4     |
+| （未使用）          |                                     |4     |8     |
+| `unsigned flags_`   | `kNumberType kNumberFlag kIntFlag kInt64Flag ...` |4     |4     |
+
+| Number (UInt)       |                                     | 32位 | 64位 |
+|---------------------|-------------------------------------|:----:|:----:|
+| `unsigned u`        | 32位无符号整数                      |4     |4     |
+| （零填充）          | 0                                   |4     |4     |
+| （未使用）          |                                     |4     |8     |
+| `unsigned flags_`   | `kNumberType kNumberFlag kUIntFlag kUInt64Flag ...` |4     |4     |
+
+| Number (Int64)      |                                     | 32位 | 64位 |
+|---------------------|-------------------------------------|:----:|:----:|
+| `int64_t i64`       | 64位有符号整数                      |8     |8     |
+| （未使用）          |                                     |4     |8     |
+| `unsigned flags_`   | `kNumberType kNumberFlag kInt64Flag ...`          |4     |4     |
+
+| Number (Uint64)     |                                     | 32位 | 64位 |
+|---------------------|-------------------------------------|:----:|:----:|
+| `uint64_t i64`      | 64位无符号整数                      |8     |8     |
+| （未使用）          |                                     |4     |8     |
+| `unsigned flags_`   | `kNumberType kNumberFlag kInt64Flag ...`          |4     |4     |
+
+| Number (Double)     |                                     | 32位 | 64位 |
+|---------------------|-------------------------------------|:----:|:----:|
+| `uint64_t i64`      | 双精度浮点数                        |8     |8     |
+| （未使用）          |                                     |4     |8     |
+| `unsigned flags_`   |`kNumberType kNumberFlag kDoubleFlag`|4     |4     |
+
+这里有一些需要注意的地方：
+* 为了减少在64位架构上的内存消耗，`SizeType` 被定义为 `unsigned` 而不是 `size_t`。
+* 32位整数的零填充可能被放在实际类型的前面或后面，这依赖于字节序。这使得它可以将32位整数不经过任何转换就可以解释为64位整数。
+* `Int` 永远是 `Int64`，反之不然。
+
+## 标志 {#Flags}
+
+32位的 `flags_` 包含了 JSON 类型和其他信息。如前文中的表所述，每一种 JSON 类型包含了冗余的 `kXXXType` 和 `kXXXFlag`。这个设计是为了优化测试位标志（`IsNumber()`）和获取每一种类型的序列号（`GetType()`）。
+
+字符串有两个可选的标志。`kCopyFlag` 表明这个字符串拥有字符串拷贝的所有权。而 `kInlineStrFlag` 意味着使用了[短字符串优化](#ShortString)。
+
+数字更加复杂一些。对于普通的整数值，它可以包含 `kIntFlag`、`kUintFlag`、 `kInt64Flag` 和/或 `kUint64Flag`，这由整数的范围决定。带有小数或者超过64位所能表达的范围的整数的数字会被存储为带有 `kDoubleFlag` 的 `double`。
+
+## 短字符串优化 {#ShortString}
+
+[Kosta](https://github.com/Kosta-Github) 提供了很棒的短字符串优化。这个优化的xxx如下所述。除去 `flags_` ，`Value` 有12或16字节（对于32位或64位）来存储实际的数据。这为在其内部直接存储短字符串而不是存储字符串的指针创造了可能。对于1字节的字符类型（例如 `char`），它可以在 `Value` 类型内部存储至多11或15个字符的字符串。
+
+|ShortString (Ch=char)|                                     | 32位 | 64位 |
+|---------------------|-------------------------------------|:----:|:----:|
+| `Ch str[MaxChars]`  | 字符串缓冲区                        |11    |15    |
+| `Ch invLength`      | MaxChars - Length                   |1     |1     |
+| `unsigned flags_`   | `kStringType kStringFlag ...`       |4     |4     |
+
+这里使用了一项特殊的技术。它存储了 (MaxChars - length) 而不直接存储字符串的长度。这使得存储11个字符并且带有后缀 `\0` 成为可能。
+
+这个优化可以减少字符串拷贝内存占用。它也改善了缓存一致性，并进一步提高了运行时性能。
+
+# 分配器（Allocator） {#InternalAllocator}
+
+`Allocator` 是 RapidJSON 中的概念：
+~~~cpp
+concept Allocator {
+    static const bool kNeedFree;    //!< 表明这个分配器是否需要调用 Free()。
+
+    // 申请内存块。
+    // \param size 内存块的大小，以字节记。
+    // \returns 指向内存块的指针。
+    void* Malloc(size_t size);
+
+    // 调整内存块的大小。
+    // \param originalPtr 当前内存块的指针。空指针是被允许的。
+    // \param originalSize 当前大小，以字节记。（设计问题：因为有些分配器可能不会记录它，显示的传递它可以节约内存。）
+    // \param newSize 新大小，以字节记。
+    void* Realloc(void* originalPtr, size_t originalSize, size_t newSize);
+
+    // 释放内存块。
+    // \param ptr 指向内存块的指针。空指针是被允许的。
+    static void Free(void *ptr);
+};
+~~~
+
+需要注意的是 `Malloc()` 和 `Realloc()` 是成员函数而 `Free()` 是静态成员函数。
+
+## MemoryPoolAllocator {#MemoryPoolAllocator}
+
+`MemoryPoolAllocator` 是 DOM 的默认内存分配器。它只申请内存而不释放内存。这对于构建 DOM 树非常合适。
+
+在它的内部，它从基础的内存分配器申请内存块（默认为 `CrtAllocator`）并将这些内存块存储为单向链表。当用户请求申请内存，它会遵循下列步骤来申请内存：
+
+1. 如果可用，使用用户提供的缓冲区。（见 [User Buffer section in DOM](doc/dom.md)）
+2. 如果用户提供的缓冲区已满，使用当前内存块。
+3. 如果当前内存块已满，申请新的内存块。
+
+# 解析优化 {#ParsingOptimization}
+
+## 使用 SIMD 跳过空格 {#SkipwhitespaceWithSIMD}
+
+当从流中解析 JSON 时，解析器需要跳过4种空格字符：
+
+1. 空格 (`U+0020`)
+2. 制表符 (`U+000B`)
+3. 换行 (`U+000A`)
+4. 回车 (`U+000D`)
+
+这是一份简单的实现：
+~~~cpp
+void SkipWhitespace(InputStream& s) {
+    while (s.Peek() == ' ' || s.Peek() == '\n' || s.Peek() == '\r' || s.Peek() == '\t')
+        s.Take();
+}
+~~~
+
+但是，这需要对每个字符进行4次比较以及一些分支。这被发现是一个热点。
+
+为了加速这一处理，RapidJSON 使用 SIMD 来在一次迭代中比较16个字符和4个空格。目前 RapidJSON 支持 SSE2 ， SSE4.2 和 ARM Neon 指令。同时它也只会对 UTF-8 内存流启用，包括字符串流或 *原位* 解析。
+
+你可以通过在包含 `rapidjson.h` 之前定义 `RAPIDJSON_SSE2` ， `RAPIDJSON_SSE42` 或 `RAPIDJSON_NEON` 来启用这个优化。一些编译器可以检测这个设置，如 `perftest.h`：
+
+~~~cpp
+// __SSE2__ 和 __SSE4_2__ 可被 gcc、clang 和 Intel 编译器识别：
+// 如果支持的话，我们在 gmake 中使用了 -march=native 来启用 -msse2 和 -msse4.2
+// 同样的， __ARM_NEON 被用于识别Neon
+#if defined(__SSE4_2__)
+#  define RAPIDJSON_SSE42
+#elif defined(__SSE2__)
+#  define RAPIDJSON_SSE2
+#elif defined(__ARM_NEON)
+#  define RAPIDJSON_NEON
+#endif
+~~~
+
+需要注意的是，这是编译期的设置。在不支持这些指令的机器上运行可执行文件会使它崩溃。
+
+### 页面对齐问题
+
+在 RapidJSON 的早期版本中，被报告了[一个问题](https://code.google.com/archive/p/rapidjson/issues/104)：`SkipWhitespace_SIMD()` 会罕见地导致崩溃（约五十万分之一的几率）。在调查之后，怀疑是 `_mm_loadu_si128()` 访问了 `'\0'` 之后的内存，并越过被保护的页面边界。
+
+在 [Intel® 64 and IA-32 Architectures Optimization Reference Manual
+](http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-optimization-manual.html) 中，章节 10.2.1：
+
+> 为了支持需要费对齐的128位 SIMD 内存访问的算法，调用者的内存缓冲区申请应当考虑添加一些填充空间，这样被调用的函数可以安全地将地址指针用于未对齐的128位 SIMD 内存操作。
+> 在结合非对齐的 SIMD 内存操作中，最小的对齐大小应该等于 SIMD 寄存器的大小。
+
+对于 RapidJSON 来说，这显然是不可行的，因为 RapidJSON 不应当强迫用户进行内存对齐。
+
+为了修复这个问题，当前的代码会先按字节处理直到下一个对齐的地址。在这之后，使用对齐读取来进行 SIMD 处理。见 [#85](https://github.com/miloyip/rapidjson/issues/85)。
+
+## 局部流拷贝 {#LocalStreamCopy}
+
+在优化的过程中，我们发现一些编译器不能将访问流的一些成员数据放入局部变量或者寄存器中。测试结果显示，对于一些流类型，创建流的拷贝并将其用于内层循环中可以改善性能。例如，实际（非 SIMD）的 `SkipWhitespace()` 被实现为：
+
+~~~cpp
+template<typename InputStream>
+void SkipWhitespace(InputStream& is) {
+    internal::StreamLocalCopy<InputStream> copy(is);
+    InputStream& s(copy.s);
+
+    while (s.Peek() == ' ' || s.Peek() == '\n' || s.Peek() == '\r' || s.Peek() == '\t')
+        s.Take();
+}
+~~~
+
+基于流的特征，`StreamLocalCopy` 会创建（或不创建）流对象的拷贝，在局部使用它并将流的状态拷贝回原来的流。
+
+## 解析为双精度浮点数 {#ParsingDouble}
+
+将字符串解析为 `double` 并不简单。标准库函数 `strtod()` 可以胜任这项工作，但它比较缓慢。默认情况下，解析器使用默认的精度设置。这最多有 3[ULP](http://en.wikipedia.org/wiki/Unit_in_the_last_place) 的误差，并实现在 `internal::StrtodNormalPrecision()` 中。
+
+当使用 `kParseFullPrecisionFlag` 时，编译器会改为调用 `internal::StrtodFullPrecision()` ，这个函数会自动调用三个版本的转换。
+1. [Fast-Path](http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/)。
+2. [double-conversion](https://github.com/floitsch/double-conversion) 中的自定义 DIY-FP 实现。
+3. （Clinger, William D. How to read floating point numbers accurately. Vol. 25. No. 6. ACM, 1990） 中的大整数算法。
+
+如果第一个转换方法失败，则尝试使用第二种方法，以此类推。
+
+# 生成优化 {#GenerationOptimization}
+
+## 整数到字符串的转换 {#itoa}
+
+整数到字符串转换的朴素算法需要对每一个十进制位进行一次处罚。我们实现了若干版本并在 [itoa-benchmark](https://github.com/miloyip/itoa-benchmark) 中对它们进行了评估。
+
+虽然 SSE2 版本是最快的，但它和第二快的 `branchlut` 差距不大。而且 `branchlut` 是纯C++实现，所以我们在 RapidJSON 中使用了 `branchlut`。
+
+## 双精度浮点数到字符串的转换 {#dtoa}
+
+原来 RapidJSON 使用 `snprintf(..., ..., "%g")` 来进行双精度浮点数到字符串的转换。这是不准确的，因为默认的精度是6。随后我们发现它很缓慢，而且有其它的替代品。
+
+Google 的 V8 [double-conversion](https://github.com/floitsch/double-conversion
+) 实现了更新的、快速的被称为 Grisu3 的算法（Loitsch, Florian. "Printing floating-point numbers quickly and accurately with integers." ACM Sigplan Notices 45.6 (2010): 233-243.）。
+
+然而，这个实现不是仅头文件的，所以我们实现了一个仅头文件的 Grisu2 版本。这个算法保证了结果永远精确。而且在大多数情况下，它会生成最短的（可选）字符串表示。
+
+这个仅头文件的转换函数在 [dtoa-benchmark](https://github.com/miloyip/dtoa-benchmark) 中进行评估。
+
+# 解析器 {#Parser}
+
+## 迭代解析 {#IterativeParser}
+
+迭代解析器是一个以非递归方式实现的递归下降的 LL(1) 解析器。
+
+### 语法 {#IterativeParserGrammar}
+
+解析器使用的语法是基于严格 JSON 语法的：
+~~~~~~~~~~
+S -> array | object
+array -> [ values ]
+object -> { members }
+values -> non-empty-values | ε
+non-empty-values -> value addition-values
+addition-values -> ε | , non-empty-values
+members -> non-empty-members | ε
+non-empty-members -> member addition-members
+addition-members -> ε | , non-empty-members
+member -> STRING : value
+value -> STRING | NUMBER | NULL | BOOLEAN | object | array
+~~~~~~~~~~
+
+注意到左因子被加入了非终结符的 `values` 和 `members` 来保证语法是 LL(1) 的。
+
+### 解析表 {#IterativeParserParsingTable}
+
+基于这份语法，我们可以构造 FIRST 和 FOLLOW 集合。
+
+非终结符的 FIRST 集合如下所示：
+
+|    NON-TERMINAL   |               FIRST              |
+|:-----------------:|:--------------------------------:|
+|       array       |                 [                |
+|       object      |                 {                |
+|       values      | ε STRING NUMBER NULL BOOLEAN { [ |
+|  addition-values  |              ε COMMA             |
+|      members      |             ε STRING             |
+|  addition-members |              ε COMMA             |
+|       member      |              STRING              |
+|       value       |  STRING NUMBER NULL BOOLEAN { [  |
+|         S         |                [ {               |
+| non-empty-members |              STRING              |
+|  non-empty-values |  STRING NUMBER NULL BOOLEAN { [  |
+
+FOLLOW 集合如下所示：
+
+|    NON-TERMINAL   |  FOLLOW |
+|:-----------------:|:-------:|
+|         S         |    $    |
+|       array       | , $ } ] |
+|       object      | , $ } ] |
+|       values      |    ]    |
+|  non-empty-values |    ]    |
+|  addition-values  |    ]    |
+|      members      |    }    |
+| non-empty-members |    }    |
+|  addition-members |    }    |
+|       member      |   , }   |
+|       value       |  , } ]  |
+
+最终可以从 FIRST 和 FOLLOW 集合生成解析表：
+
+|    NON-TERMINAL   |           [           |           {           |          ,          | : | ] | } |          STRING         |         NUMBER        |          NULL         |        BOOLEAN        |
+|:-----------------:|:---------------------:|:---------------------:|:-------------------:|:-:|:-:|:-:|:-----------------------:|:---------------------:|:---------------------:|:---------------------:|
+|         S         |         array         |         object        |                     |   |   |   |                         |                       |                       |                       |
+|       array       |       [ values ]      |                       |                     |   |   |   |                         |                       |                       |                       |
+|       object      |                       |      { members }      |                     |   |   |   |                         |                       |                       |                       |
+|       values      |    non-empty-values   |    non-empty-values   |                     |   | ε |   |     non-empty-values    |    non-empty-values   |    non-empty-values   |    non-empty-values   |
+|  non-empty-values | value addition-values | value addition-values |                     |   |   |   |  value addition-values  | value addition-values | value addition-values | value addition-values |
+|  addition-values  |                       |                       |  , non-empty-values |   | ε |   |                         |                       |                       |                       |
+|      members      |                       |                       |                     |   |   | ε |    non-empty-members    |                       |                       |                       |
+| non-empty-members |                       |                       |                     |   |   |   | member addition-members |                       |                       |                       |
+|  addition-members |                       |                       | , non-empty-members |   |   | ε |                         |                       |                       |                       |
+|       member      |                       |                       |                     |   |   |   |      STRING : value     |                       |                       |                       |
+|       value       |         array         |         object        |                     |   |   |   |          STRING         |         NUMBER        |          NULL         |        BOOLEAN        |
+
+对于上面的语法分析，这里有一个很棒的[工具](http://hackingoff.com/compilers/predict-first-follow-set)。
+
+### 实现 {#IterativeParserImplementation}
+
+基于这份解析表，一个直接的（常规的）将规则反向入栈的实现可以正常工作。
+
+在 RapidJSON 中，对直接的实现进行了一些修改：
+
+首先，在 RapidJSON 中，这份解析表被编码为状态机。
+规则由头部和主体组成。
+状态转换由规则构造。
+除此之外，额外的状态被添加到与 `array` 和 `object` 有关的规则。
+通过这种方式，生成数组值或对象成员可以只用一次状态转移便可完成，
+而不需要在直接的实现中的多次出栈/入栈操作。
+这也使得估计栈的大小更加容易。
+
+状态图如如下所示：
+
+![状态图](diagram/iterative-parser-states-diagram.png)
+
+第二，迭代解析器也在内部栈保存了数组的值个数和对象成员的数量，这也与传统的实现不同。
diff --git a/doc/sax.md b/doc/sax.md
index 1d4fc2ae5..486788071 100644
--- a/doc/sax.md
+++ b/doc/sax.md
@@ -8,7 +8,7 @@ In RapidJSON, `Reader` (typedef of `GenericReader<...>`) is the SAX-style parser
 
 # Reader {#Reader}
 
-`Reader` parses a JSON from a stream. While it reads characters from the stream, it analyze the characters according to the syntax of JSON, and publish events to a handler.
+`Reader` parses a JSON from a stream. While it reads characters from the stream, it analyzes the characters according to the syntax of JSON, and publishes events to a handler.
 
 For example, here is a JSON.
 
@@ -24,7 +24,7 @@ For example, here is a JSON.
 }
 ~~~~~~~~~~
 
-While a `Reader` parses this JSON, it publishes the following events to the handler sequentially:
+When a `Reader` parses this JSON, it publishes the following events to the handler sequentially:
 
 ~~~~~~~~~~
 StartObject()
@@ -50,7 +50,7 @@ EndArray(4)
 EndObject(7)
 ~~~~~~~~~~
 
-These events can be easily matched with the JSON, except some event parameters need further explanation. Let's see the `simplereader` example which produces exactly the same output as above:
+These events can be easily matched with the JSON, but some event parameters need further explanation. Let's see the `simplereader` example which produces exactly the same output as above:
 
 ~~~~~~~~~~cpp
 #include "rapidjson/reader.h"
@@ -91,11 +91,11 @@ void main() {
 }
 ~~~~~~~~~~
 
-Note that, RapidJSON uses template to statically bind the `Reader` type and the handler type, instead of using class with virtual functions. This paradigm can improve the performance by inlining functions.
+Note that RapidJSON uses templates to statically bind the `Reader` type and the handler type, instead of using classes with virtual functions. This paradigm can improve performance by inlining functions.
 
 ## Handler {#Handler}
 
-As the previous example showed, user needs to implement a handler, which consumes the events (function calls) from `Reader`. The handler must contain the following member functions.
+As shown in the previous example, the user needs to implement a handler which consumes the events (via function calls) from the `Reader`. The handler must contain the following member functions.
 
 ~~~~~~~~~~cpp
 class Handler {
@@ -122,15 +122,15 @@ class Handler {
 
 When the `Reader` encounters a JSON number, it chooses a suitable C++ type mapping. And then it calls *one* function out of `Int(int)`, `Uint(unsigned)`, `Int64(int64_t)`, `Uint64(uint64_t)` and `Double(double)`. If `kParseNumbersAsStrings` is enabled, `Reader` will always calls `RawNumber()` instead.
 
-`String(const char* str, SizeType length, bool copy)` is called when the `Reader` encounters a string. The first parameter is pointer to the string. The second parameter is the length of the string (excluding the null terminator). Note that RapidJSON supports null character `'\0'` inside a string. If such situation happens, `strlen(str) < length`. The last `copy` indicates whether the handler needs to make a copy of the string. For normal parsing, `copy = true`. Only when *insitu* parsing is used, `copy = false`. And beware that, the character type depends on the target encoding, which will be explained later.
+`String(const char* str, SizeType length, bool copy)` is called when the `Reader` encounters a string. The first parameter is pointer to the string. The second parameter is the length of the string (excluding the null terminator). Note that RapidJSON supports null character `\0` inside a string. If such situation happens, `strlen(str) < length`. The last `copy` indicates whether the handler needs to make a copy of the string. For normal parsing, `copy = true`. Only when *insitu* parsing is used, `copy = false`. And be aware that the character type depends on the target encoding, which will be explained later.
 
-When the `Reader` encounters the beginning of an object, it calls `StartObject()`. An object in JSON is a set of name-value pairs. If the object contains members it first calls `Key()` for the name of member, and then calls functions depending on the type of the value. These calls of name-value pairs repeats until calling `EndObject(SizeType memberCount)`. Note that the `memberCount` parameter is just an aid for the handler, user may not need this parameter.
+When the `Reader` encounters the beginning of an object, it calls `StartObject()`. An object in JSON is a set of name-value pairs. If the object contains members it first calls `Key()` for the name of member, and then calls functions depending on the type of the value. These calls of name-value pairs repeat until calling `EndObject(SizeType memberCount)`. Note that the `memberCount` parameter is just an aid for the handler; users who do not need this parameter may ignore it.
 
-Array is similar to object but simpler. At the beginning of an array, the `Reader` calls `BeginArary()`. If there is elements, it calls functions according to the types of element. Similarly, in the last call `EndArray(SizeType elementCount)`, the parameter `elementCount` is just an aid for the handler.
+Arrays are similar to objects, but simpler. At the beginning of an array, the `Reader` calls `BeginArary()`. If there is elements, it calls functions according to the types of element. Similarly, in the last call `EndArray(SizeType elementCount)`, the parameter `elementCount` is just an aid for the handler.
 
-Every handler functions returns a `bool`. Normally it should returns `true`. If the handler encounters an error, it can return `false` to notify event publisher to stop further processing.
+Every handler function returns a `bool`. Normally it should return `true`. If the handler encounters an error, it can return `false` to notify the event publisher to stop further processing.
 
-For example, when we parse a JSON with `Reader` and the handler detected that the JSON does not conform to the required schema, then the handler can return `false` and let the `Reader` stop further parsing. And the `Reader` will be in error state with error code `kParseErrorTermination`.
+For example, when we parse a JSON with `Reader` and the handler detects that the JSON does not conform to the required schema, the handler can return `false` and let the `Reader` stop further parsing. This will place the `Reader` in an error state, with error code `kParseErrorTermination`.
 
 ## GenericReader {#GenericReader}
 
@@ -149,19 +149,19 @@ typedef GenericReader<UTF8<>, UTF8<> > Reader;
 } // namespace rapidjson
 ~~~~~~~~~~
 
-The `Reader` uses UTF-8 as both source and target encoding. The source encoding means the encoding in the JSON stream. The target encoding means the encoding of the `str` parameter in `String()` calls. For example, to parse a UTF-8 stream and outputs UTF-16 string events, you can define a reader by:
+The `Reader` uses UTF-8 as both source and target encoding. The source encoding means the encoding in the JSON stream. The target encoding means the encoding of the `str` parameter in `String()` calls. For example, to parse a UTF-8 stream and output UTF-16 string events, you can define a reader by:
 
 ~~~~~~~~~~cpp
 GenericReader<UTF8<>, UTF16<> > reader;
 ~~~~~~~~~~
 
-Note that, the default character type of `UTF16` is `wchar_t`. So this `reader`needs to call `String(const wchar_t*, SizeType, bool)` of the handler.
+Note that, the default character type of `UTF16` is `wchar_t`. So this `reader` needs to call `String(const wchar_t*, SizeType, bool)` of the handler.
 
 The third template parameter `Allocator` is the allocator type for internal data structure (actually a stack).
 
 ## Parsing {#SaxParsing}
 
-The one and only one function of `Reader` is to parse JSON. 
+The main function of `Reader` is used to parse JSON. 
 
 ~~~~~~~~~~cpp
 template <unsigned parseFlags, typename InputStream, typename Handler>
@@ -172,7 +172,30 @@ template <typename InputStream, typename Handler>
 bool Parse(InputStream& is, Handler& handler);
 ~~~~~~~~~~
 
-If an error occurs during parsing, it will return `false`. User can also calls `bool HasParseEror()`, `ParseErrorCode GetParseErrorCode()` and `size_t GetErrorOffset()` to obtain the error states. Actually `Document` uses these `Reader` functions to obtain parse errors. Please refer to [DOM](doc/dom.md) for details about parse error.
+If an error occurs during parsing, it will return `false`. User can also call `bool HasParseError()`, `ParseErrorCode GetParseErrorCode()` and `size_t GetErrorOffset()` to obtain the error states. In fact, `Document` uses these `Reader` functions to obtain parse errors. Please refer to [DOM](doc/dom.md) for details about parse errors.
+
+## Token-by-Token Parsing {#TokenByTokenParsing}
+
+Some users may wish to parse a JSON input stream a single token at a time, instead of immediately parsing an entire document without stopping. To parse JSON this way, instead of calling `Parse`, you can use the `IterativeParse` set of functions:
+
+~~~~~~~~~~cpp
+    void IterativeParseInit();
+	
+    template <unsigned parseFlags, typename InputStream, typename Handler>
+    bool IterativeParseNext(InputStream& is, Handler& handler);
+
+    bool IterativeParseComplete();
+~~~~~~~~~~
+
+Here is an example of iteratively parsing JSON, token by token:
+
+~~~~~~~~~~cpp
+    reader.IterativeParseInit();
+    while (!reader.IterativeParseComplete()) {
+        reader.IterativeParseNext<kParseDefaultFlags>(is, handler);
+		// Your handler has been called once.
+    }
+~~~~~~~~~~
 
 # Writer {#Writer}
 
diff --git a/doc/sax.zh-cn.md b/doc/sax.zh-cn.md
index b20286de9..740c339fa 100644
--- a/doc/sax.zh-cn.md
+++ b/doc/sax.zh-cn.md
@@ -122,7 +122,7 @@ class Handler {
 
 当 `Reader` 遇到 JSON number，它会选择一个合适的 C++ 类型映射，然后调用 `Int(int)`、`Uint(unsigned)`、`Int64(int64_t)`、`Uint64(uint64_t)` 及 `Double(double)` 的 * 其中之一个 *。 若开启了 `kParseNumbersAsStrings` 选项，`Reader` 便会改为调用 `RawNumber()`。
 
-当 `Reader` 遇到 JSON string，它会调用 `String(const char* str, SizeType length, bool copy)`。第一个参数是字符串的指针。第二个参数是字符串的长度（不包含空终止符号）。注意 RapidJSON 支持字串中含有空字符 `'\0'`。若出现这种情况，便会有 `strlen(str) < length`。最后的 `copy` 参数表示处理器是否需要复制该字符串。在正常解析时，`copy = true`。仅当使用原位解析时，`copy = false`。此外，还要注意字符的类型与目标编码相关，我们稍后会再谈这一点。
+当 `Reader` 遇到 JSON string，它会调用 `String(const char* str, SizeType length, bool copy)`。第一个参数是字符串的指针。第二个参数是字符串的长度（不包含空终止符号）。注意 RapidJSON 支持字串中含有空字符 `\0`。若出现这种情况，便会有 `strlen(str) < length`。最后的 `copy` 参数表示处理器是否需要复制该字符串。在正常解析时，`copy = true`。仅当使用原位解析时，`copy = false`。此外，还要注意字符的类型与目标编码相关，我们稍后会再谈这一点。
 
 当 `Reader` 遇到 JSON object 的开始之时，它会调用 `StartObject()`。JSON 的 object 是一个键值对（成员）的集合。若 object 包含成员，它会先为成员的名字调用 `Key()`，然后再按值的类型调用函数。它不断调用这些键值对，直至最终调用 `EndObject(SizeType memberCount)`。注意 `memberCount` 参数对处理器来说只是协助性质，使用者可能不需要此参数。
 
diff --git a/doc/schema.md b/doc/schema.md
index 8b4195b75..29ba4f545 100644
--- a/doc/schema.md
+++ b/doc/schema.md
@@ -20,7 +20,7 @@ Secondly, construct a `SchemaValidator` with the `SchemaDocument`. It is similar
 // ...
 
 Document sd;
-if (!sd.Parse(schemaJson).HasParseError()) {
+if (sd.Parse(schemaJson).HasParseError()) {
     // the schema is not a valid JSON.
     // ...       
 }
@@ -28,7 +28,7 @@ SchemaDocument schema(sd); // Compile a Document to SchemaDocument
 // sd is no longer needed here.
 
 Document d;
-if (!d.Parse(inputJson).HasParseError()) {
+if (d.Parse(inputJson).HasParseError()) {
     // the input is not a valid JSON.
     // ...       
 }
diff --git a/doc/schema.zh-cn.md b/doc/schema.zh-cn.md
index fa076de85..5df1f312f 100644
--- a/doc/schema.zh-cn.md
+++ b/doc/schema.zh-cn.md
@@ -20,7 +20,7 @@ RapidJSON 实现了一个 [JSON Schema Draft v4](http://json-schema.org/document
 // ...
 
 Document sd;
-if (!sd.Parse(schemaJson).HasParseError()) {
+if (sd.Parse(schemaJson).HasParseError()) {
     // 此 schema 不是合法的 JSON
     // ...       
 }
@@ -28,7 +28,7 @@ SchemaDocument schema(sd); // 把一个 Document 编译至 SchemaDocument
 // 之后不再需要 sd
 
 Document d;
-if (!d.Parse(inputJson).HasParseError()) {
+if (d.Parse(inputJson).HasParseError()) {
     // 输入不是一个合法的 JSON
     // ...       
 }
diff --git a/doc/tutorial.md b/doc/tutorial.md
index cb76b4b0b..167b81dd7 100644
--- a/doc/tutorial.md
+++ b/doc/tutorial.md
@@ -2,7 +2,7 @@
 
 This tutorial introduces the basics of the Document Object Model(DOM) API.
 
-As shown in [Usage at a glance](@ref index), a JSON can be parsed into DOM, and then the DOM can be queried and modified easily, and finally be converted back to JSON.
+As shown in [Usage at a glance](@ref index), JSON can be parsed into a DOM, and then the DOM can be queried and modified easily, and finally be converted back to JSON.
 
 [TOC]
 
@@ -14,7 +14,7 @@ Each JSON value is stored in a type called `Value`. A `Document`, representing t
 
 In this section, we will use excerpt of `example/tutorial/tutorial.cpp`.
 
-Assumes we have a JSON stored in a C string (`const char* json`):
+Assume we have the following JSON stored in a C string (`const char* json`):
 ~~~~~~~~~~js
 {
     "hello": "world",
@@ -55,7 +55,7 @@ printf("hello = %s\n", document["hello"].GetString());
 ~~~~~~~~~~
 
 ~~~~~~~~~~
-world
+hello = world
 ~~~~~~~~~~
 
 JSON true/false values are represented as `bool`.
@@ -65,16 +65,16 @@ printf("t = %s\n", document["t"].GetBool() ? "true" : "false");
 ~~~~~~~~~~
 
 ~~~~~~~~~~
-true
+t = true
 ~~~~~~~~~~
 
-JSON null can be queryed by `IsNull()`.
+JSON null can be queryed with `IsNull()`.
 ~~~~~~~~~~cpp
 printf("n = %s\n", document["n"].IsNull() ? "null" : "?");
 ~~~~~~~~~~
 
 ~~~~~~~~~~
-null
+n = null
 ~~~~~~~~~~
 
 JSON number type represents all numeric values. However, C++ needs more specific type for manipulation.
@@ -115,15 +115,15 @@ a[3] = 4
 
 Note that, RapidJSON does not automatically convert values between JSON types. If a value is a string, it is invalid to call `GetInt()`, for example. In debug mode it will fail an assertion. In release mode, the behavior is undefined.
 
-In the following, details about querying individual types are discussed.
+In the following sections we discuss details about querying individual types.
 
 ## Query Array {#QueryArray}
 
-By default, `SizeType` is typedef of `unsigned`. In most systems, array is limited to store up to 2^32-1 elements.
+By default, `SizeType` is typedef of `unsigned`. In most systems, an array is limited to store up to 2^32-1 elements.
 
-You may access the elements in array by integer literal, for example, `a[0]`, `a[1]`, `a[2]`.
+You may access the elements in an array by integer literal, for example, `a[0]`, `a[1]`, `a[2]`.
 
-Array is similar to `std::vector`, instead of using indices, you may also use iterator to access all the elements.
+Array is similar to `std::vector`: instead of using indices, you may also use iterator to access all the elements.
 ~~~~~~~~~~cpp
 for (Value::ConstValueIterator itr = a.Begin(); itr != a.End(); ++itr)
     printf("%d ", itr->GetInt());
@@ -144,7 +144,7 @@ for (auto& v : a.GetArray())
 
 ## Query Object {#QueryObject}
 
-Similar to array, we can access all object members by iterator:
+Similar to Array, we can access all object members by iterator:
 
 ~~~~~~~~~~cpp
 static const char* kTypeNames[] = 
@@ -190,11 +190,11 @@ for (auto& m : document.GetObject())
 
 ## Querying Number {#QueryNumber}
 
-JSON provide a single numerical type called Number. Number can be integer or real numbers. RFC 4627 says the range of Number is specified by parser.
+JSON provides a single numerical type called Number. Number can be an integer or a real number. RFC 4627 says the range of Number is specified by the parser implementation.
 
-As C++ provides several integer and floating point number types, the DOM tries to handle these with widest possible range and good performance.
+As C++ provides several integer and floating point number types, the DOM tries to handle these with the widest possible range and good performance.
 
-When a Number is parsed, it is stored in the DOM as either one of the following type:
+When a Number is parsed, it is stored in the DOM as one of the following types:
 
 Type       | Description
 -----------|---------------------------------------
@@ -204,7 +204,7 @@ Type       | Description
 `int64_t`  | 64-bit signed integer
 `double`   | 64-bit double precision floating point
 
-When querying a number, you can check whether the number can be obtained as target type:
+When querying a number, you can check whether the number can be obtained as the target type:
 
 Checking          | Obtaining
 ------------------|---------------------
@@ -215,9 +215,9 @@ Checking          | Obtaining
 `bool IsInt64()`  | `int64_t GetInt64()`
 `bool IsDouble()` | `double GetDouble()`
 
-Note that, an integer value may be obtained in various ways without conversion. For example, A value `x` containing 123 will make `x.IsInt() == x.IsUint() == x.IsInt64() == x.IsUint64() == true`. But a value `y` containing -3000000000 will only makes `x.IsInt64() == true`.
+Note that, an integer value may be obtained in various ways without conversion. For example, A value `x` containing 123 will make `x.IsInt() == x.IsUint() == x.IsInt64() == x.IsUint64() == true`. But a value `y` containing -3000000000 will only make `x.IsInt64() == true`.
 
-When obtaining the numeric values, `GetDouble()` will convert internal integer representation to a `double`. Note that, `int` and `unsigned` can be safely convert to `double`, but `int64_t` and `uint64_t` may lose precision (since mantissa of `double` is only 52-bits).
+When obtaining the numeric values, `GetDouble()` will convert internal integer representation to a `double`. Note that, `int` and `unsigned` can be safely converted to `double`, but `int64_t` and `uint64_t` may lose precision (since mantissa of `double` is only 52-bits).
 
 ## Query String {#QueryString}
 
@@ -225,7 +225,7 @@ In addition to `GetString()`, the `Value` class also contains `GetStringLength()
 
 According to RFC 4627, JSON strings can contain Unicode character `U+0000`, which must be escaped as `"\u0000"`. The problem is that, C/C++ often uses null-terminated string, which treats ``\0'` as the terminator symbol.
 
-To conform RFC 4627, RapidJSON supports string containing `U+0000`. If you need to handle this, you can use `GetStringLength()` API to obtain the correct length of string.
+To conform RFC 4627, RapidJSON supports string containing `U+0000`. If you need to handle this, you can use `GetStringLength()` to obtain the correct string length.
 
 For example, after parsing a the following JSON to `Document d`:
 
@@ -360,14 +360,14 @@ a.PushBack(Value(42).Move(), allocator);   // same as above
 ~~~~~~~~~~
 
 ## Create String {#CreateString}
-RapidJSON provide two strategies for storing string.
+RapidJSON provides two strategies for storing string.
 
 1. copy-string: allocates a buffer, and then copy the source data into it.
 2. const-string: simply store a pointer of string.
 
-Copy-string is always safe because it owns a copy of the data. Const-string can be used for storing string literal, and in-situ parsing which we will mentioned in Document section.
+Copy-string is always safe because it owns a copy of the data. Const-string can be used for storing a string literal, and for in-situ parsing which will be mentioned in the DOM section.
 
-To make memory allocation customizable, RapidJSON requires user to pass an instance of allocator, whenever an operation may require allocation. This design is needed to prevent storing a allocator (or Document) pointer per Value.
+To make memory allocation customizable, RapidJSON requires users to pass an instance of allocator, whenever an operation may require allocation. This design is needed to prevent storing a allocator (or Document) pointer per Value.
 
 Therefore, when we assign a copy-string, we call this overloaded `SetString()` with allocator:
 
@@ -385,7 +385,7 @@ In this example, we get the allocator from a `Document` instance. This is a comm
 
 Besides, the above `SetString()` requires length. This can handle null characters within a string. There is another `SetString()` overloaded function without the length parameter. And it assumes the input is null-terminated and calls a `strlen()`-like function to obtain the length.
 
-Finally, for string literal or string with safe life-cycle can use const-string version of `SetString()`, which lacks allocator parameter.  For string literals (or constant character arrays), simply passing the literal as parameter is safe and efficient:
+Finally, for a string literal or string with a safe life-cycle one can use the const-string version of `SetString()`, which lacks an allocator parameter.  For string literals (or constant character arrays), simply passing the literal as parameter is safe and efficient:
 
 ~~~~~~~~~~cpp
 Value s;
@@ -393,7 +393,7 @@ s.SetString("rapidjson");    // can contain null character, length derived at co
 s = "rapidjson";             // shortcut, same as above
 ~~~~~~~~~~
 
-For character pointer, the RapidJSON requires to mark it as safe before using it without copying. This can be achieved by using the `StringRef` function:
+For a character pointer, RapidJSON requires it to be marked as safe before using it without copying. This can be achieved by using the `StringRef` function:
 
 ~~~~~~~~~cpp
 const char * cstr = getenv("USER");
@@ -408,7 +408,7 @@ s = StringRef(cstr,cstr_len);          // shortcut, same as above
 ~~~~~~~~~
 
 ## Modify Array {#ModifyArray}
-Value with array type provides similar APIs as `std::vector`.
+Value with array type provides an API similar to `std::vector`.
 
 * `Clear()`
 * `Reserve(SizeType, Allocator&)`
@@ -418,7 +418,7 @@ Value with array type provides similar APIs as `std::vector`.
 * `ValueIterator Erase(ConstValueIterator pos)`
 * `ValueIterator Erase(ConstValueIterator first, ConstValueIterator last)`
 
-Note that, `Reserve(...)` and `PushBack(...)` may allocate memory for the array elements, therefore require an allocator.
+Note that, `Reserve(...)` and `PushBack(...)` may allocate memory for the array elements, therefore requiring an allocator.
 
 Here is an example of `PushBack()`:
 
@@ -433,7 +433,7 @@ for (int i = 5; i <= 10; i++)
 a.PushBack("Lua", allocator).PushBack("Mio", allocator);
 ~~~~~~~~~~
 
-Differs from STL, `PushBack()`/`PopBack()` returns the array reference itself. This is called _fluent interface_.
+This API differs from STL in that `PushBack()`/`PopBack()` return the array reference itself. This is called _fluent interface_.
 
 If you want to add a non-constant string or a string without sufficient lifetime (see [Create String](#CreateString)) to the array, you need to create a string Value by using the copy-string API.  To avoid the need for an intermediate variable, you can use a [temporary value](#TemporaryValues) in place:
 
@@ -448,7 +448,7 @@ contact.PushBack(val, document.GetAllocator());
 ~~~~~~~~~~
 
 ## Modify Object {#ModifyObject}
-Object is a collection of key-value pairs (members). Each key must be a string value. To modify an object, either add or remove members. THe following APIs are for adding members:
+The Object class is a collection of key-value pairs (members). Each key must be a string value. To modify an object, either add or remove members. The following API is for adding members:
 
 * `Value& AddMember(Value&, Value&, Allocator& allocator)`
 * `Value& AddMember(StringRefType, Value&, Allocator&)`
@@ -462,7 +462,7 @@ contact.AddMember("name", "Milo", document.GetAllocator());
 contact.AddMember("married", true, document.GetAllocator());
 ~~~~~~~~~~
 
-The name parameter with `StringRefType` is similar to the interface of `SetString` function for string values. These overloads are used to avoid the need for copying the `name` string, as constant key names are very common in JSON objects.
+The name parameter with `StringRefType` is similar to the interface of the `SetString` function for string values. These overloads are used to avoid the need for copying the `name` string, since constant key names are very common in JSON objects.
 
 If you need to create a name from a non-constant string or a string without sufficient lifetime (see [Create String](#CreateString)), you need to create a string Value by using the copy-string API.  To avoid the need for an intermediate variable, you can use a [temporary value](#TemporaryValues) in place:
 
diff --git a/doc/tutorial.zh-cn.md b/doc/tutorial.zh-cn.md
index ec1315c8f..6b2588f7e 100644
--- a/doc/tutorial.zh-cn.md
+++ b/doc/tutorial.zh-cn.md
@@ -343,7 +343,7 @@ Value o(kObjectType);
 
 ![转移语义不需复制。](diagram/move3.png)
 
-在 C++11 中这称为转移赋值操作（move assignment operator）。由于 RapidJSON 支持 C++03，它在赋值操作采用转移语义，其它修改形函数如 `AddMember()`, `PushBack()` 也采用转移语义。
+在 C++11 中这称为转移赋值操作（move assignment operator）。由于 RapidJSON 支持 C++03，它在赋值操作采用转移语义，其它修改型函数如 `AddMember()`, `PushBack()` 也采用转移语义。
 
 ### 转移语义及临时值 {#TemporaryValues}
 
diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt
index 4d448ccc0..e00f77aab 100644
--- a/example/CMakeLists.txt
+++ b/example/CMakeLists.txt
@@ -10,6 +10,7 @@ set(EXAMPLES
     filterkey
     filterkeydom
     jsonx
+    lookaheadparser
     messagereader
     parsebyparts
     pretty
@@ -18,17 +19,17 @@ set(EXAMPLES
     serialize
     simpledom
     simplereader
+    simplepullreader
     simplewriter
     tutorial)
     
 include_directories("../include/")
 
 add_definitions(-D__STDC_FORMAT_MACROS)
+set_property(DIRECTORY PROPERTY COMPILE_OPTIONS ${EXTRA_CXX_FLAGS})
 
 if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -Werror -Wall -Wextra -Weffc++ -Wswitch-default")
-elseif (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Wall -Wextra -Weffc++ -Wswitch-default -Wfloat-equal -Wimplicit-fallthrough -Weverything")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
 endif()
 
 foreach (example ${EXAMPLES})
diff --git a/example/lookaheadparser/lookaheadparser.cpp b/example/lookaheadparser/lookaheadparser.cpp
new file mode 100644
index 000000000..f627f4d86
--- /dev/null
+++ b/example/lookaheadparser/lookaheadparser.cpp
@@ -0,0 +1,350 @@
+#include "rapidjson/reader.h"
+#include "rapidjson/document.h"
+#include <iostream>
+
+RAPIDJSON_DIAG_PUSH
+#ifdef __GNUC__
+RAPIDJSON_DIAG_OFF(effc++)
+#endif
+
+// This example demonstrates JSON token-by-token parsing with an API that is
+// more direct; you don't need to design your logic around a handler object and
+// callbacks. Instead, you retrieve values from the JSON stream by calling
+// GetInt(), GetDouble(), GetString() and GetBool(), traverse into structures
+// by calling EnterObject() and EnterArray(), and skip over unwanted data by
+// calling SkipValue(). When you know your JSON's structure, this can be quite
+// convenient.
+//
+// If you aren't sure of what's next in the JSON data, you can use PeekType() and
+// PeekValue() to look ahead to the next object before reading it.
+//
+// If you call the wrong retrieval method--e.g. GetInt when the next JSON token is
+// not an int, EnterObject or EnterArray when there isn't actually an object or array
+// to read--the stream parsing will end immediately and no more data will be delivered.
+//
+// After calling EnterObject, you retrieve keys via NextObjectKey() and values via
+// the normal getters. When NextObjectKey() returns null, you have exited the
+// object, or you can call SkipObject() to skip to the end of the object
+// immediately. If you fetch the entire object (i.e. NextObjectKey() returned  null),
+// you should not call SkipObject().
+//
+// After calling EnterArray(), you must alternate between calling NextArrayValue()
+// to see if the array has more data, and then retrieving values via the normal
+// getters. You can call SkipArray() to skip to the end of the array immediately.
+// If you fetch the entire array (i.e. NextArrayValue() returned null),
+// you should not call SkipArray().
+//
+// This parser uses in-situ strings, so the JSON buffer will be altered during the
+// parse.
+
+using namespace rapidjson;
+
+
+class LookaheadParserHandler {
+public:
+    bool Null() { st_ = kHasNull; v_.SetNull(); return true; }
+    bool Bool(bool b) { st_ = kHasBool; v_.SetBool(b); return true; }
+    bool Int(int i) { st_ = kHasNumber; v_.SetInt(i); return true; }
+    bool Uint(unsigned u) { st_ = kHasNumber; v_.SetUint(u); return true; }
+    bool Int64(int64_t i) { st_ = kHasNumber; v_.SetInt64(i); return true; }
+    bool Uint64(uint64_t u) { st_ = kHasNumber; v_.SetUint64(u); return true; }
+    bool Double(double d) { st_ = kHasNumber; v_.SetDouble(d); return true; }
+    bool RawNumber(const char*, SizeType, bool) { return false; }
+    bool String(const char* str, SizeType length, bool) { st_ = kHasString; v_.SetString(str, length); return true; }
+    bool StartObject() { st_ = kEnteringObject; return true; }
+    bool Key(const char* str, SizeType length, bool) { st_ = kHasKey; v_.SetString(str, length); return true; }
+    bool EndObject(SizeType) { st_ = kExitingObject; return true; }
+    bool StartArray() { st_ = kEnteringArray; return true; }
+    bool EndArray(SizeType) { st_ = kExitingArray; return true; }
+
+protected:
+    LookaheadParserHandler(char* str);
+    void ParseNext();
+
+protected:
+    enum LookaheadParsingState {
+        kInit,
+        kError,
+        kHasNull,
+        kHasBool,
+        kHasNumber,
+        kHasString,
+        kHasKey,
+        kEnteringObject,
+        kExitingObject,
+        kEnteringArray,
+        kExitingArray
+    };
+    
+    Value v_;
+    LookaheadParsingState st_;
+    Reader r_;
+    InsituStringStream ss_;
+    
+    static const int parseFlags = kParseDefaultFlags | kParseInsituFlag;
+};
+
+LookaheadParserHandler::LookaheadParserHandler(char* str) : v_(), st_(kInit), r_(), ss_(str) {
+    r_.IterativeParseInit();
+    ParseNext();
+}
+
+void LookaheadParserHandler::ParseNext() {
+    if (r_.HasParseError()) {
+        st_ = kError;
+        return;
+    }
+    
+    r_.IterativeParseNext<parseFlags>(ss_, *this);
+}
+
+class LookaheadParser : protected LookaheadParserHandler {
+public:
+    LookaheadParser(char* str) : LookaheadParserHandler(str) {}
+    
+    bool EnterObject();
+    bool EnterArray();
+    const char* NextObjectKey();
+    bool NextArrayValue();
+    int GetInt();
+    double GetDouble();
+    const char* GetString();
+    bool GetBool();
+    void GetNull();
+
+    void SkipObject();
+    void SkipArray();
+    void SkipValue();
+    Value* PeekValue();
+    int PeekType(); // returns a rapidjson::Type, or -1 for no value (at end of object/array)
+    
+    bool IsValid() { return st_ != kError; }
+    
+protected:
+    void SkipOut(int depth);
+};
+
+bool LookaheadParser::EnterObject() {
+    if (st_ != kEnteringObject) {
+        st_  = kError;
+        return false;
+    }
+    
+    ParseNext();
+    return true;
+}
+
+bool LookaheadParser::EnterArray() {
+    if (st_ != kEnteringArray) {
+        st_  = kError;
+        return false;
+    }
+    
+    ParseNext();
+    return true;
+}
+
+const char* LookaheadParser::NextObjectKey() {
+    if (st_ == kHasKey) {
+        const char* result = v_.GetString();
+        ParseNext();
+        return result;
+    }
+    
+    if (st_ != kExitingObject) {
+        st_ = kError;
+        return 0;
+    }
+    
+    ParseNext();
+    return 0;
+}
+
+bool LookaheadParser::NextArrayValue() {
+    if (st_ == kExitingArray) {
+        ParseNext();
+        return false;
+    }
+    
+    if (st_ == kError || st_ == kExitingObject || st_ == kHasKey) {
+        st_ = kError;
+        return false;
+    }
+
+    return true;
+}
+
+int LookaheadParser::GetInt() {
+    if (st_ != kHasNumber || !v_.IsInt()) {
+        st_ = kError;
+        return 0;
+    }
+
+    int result = v_.GetInt();
+    ParseNext();
+    return result;
+}
+
+double LookaheadParser::GetDouble() {
+    if (st_ != kHasNumber) {
+        st_  = kError;
+        return 0.;
+    }
+    
+    double result = v_.GetDouble();
+    ParseNext();
+    return result;
+}
+
+bool LookaheadParser::GetBool() {
+    if (st_ != kHasBool) {
+        st_  = kError;
+        return false;
+    }
+    
+    bool result = v_.GetBool();
+    ParseNext();
+    return result;
+}
+
+void LookaheadParser::GetNull() {
+    if (st_ != kHasNull) {
+        st_  = kError;
+        return;
+    }
+
+    ParseNext();
+}
+
+const char* LookaheadParser::GetString() {
+    if (st_ != kHasString) {
+        st_  = kError;
+        return 0;
+    }
+    
+    const char* result = v_.GetString();
+    ParseNext();
+    return result;
+}
+
+void LookaheadParser::SkipOut(int depth) {
+    do {
+        if (st_ == kEnteringArray || st_ == kEnteringObject) {
+            ++depth;
+        }
+        else if (st_ == kExitingArray || st_ == kExitingObject) {
+            --depth;
+        }
+        else if (st_ == kError) {
+            return;
+        }
+
+        ParseNext();
+    }
+    while (depth > 0);
+}
+
+void LookaheadParser::SkipValue() {
+    SkipOut(0);
+}
+
+void LookaheadParser::SkipArray() {
+    SkipOut(1);
+}
+
+void LookaheadParser::SkipObject() {
+    SkipOut(1);
+}
+
+Value* LookaheadParser::PeekValue() {
+    if (st_ >= kHasNull && st_ <= kHasKey) {
+        return &v_;
+    }
+    
+    return 0;
+}
+
+int LookaheadParser::PeekType() {
+    if (st_ >= kHasNull && st_ <= kHasKey) {
+        return v_.GetType();
+    }
+    
+    if (st_ == kEnteringArray) {
+        return kArrayType;
+    }
+    
+    if (st_ == kEnteringObject) {
+        return kObjectType;
+    }
+
+    return -1;
+}
+
+//-------------------------------------------------------------------------
+
+int main() {
+    using namespace std;
+
+    char json[] = " { \"hello\" : \"world\", \"t\" : true , \"f\" : false, \"n\": null,"
+        "\"i\":123, \"pi\": 3.1416, \"a\":[-1, 2, 3, 4, \"array\", []], \"skipArrays\":[1, 2, [[[3]]]], "
+        "\"skipObject\":{ \"i\":0, \"t\":true, \"n\":null, \"d\":123.45 }, "
+        "\"skipNested\":[[[[{\"\":0}, {\"\":[-9.87]}]]], [], []], "
+        "\"skipString\":\"zzz\", \"reachedEnd\":null, \"t\":true }";
+
+    LookaheadParser r(json);
+    
+    RAPIDJSON_ASSERT(r.PeekType() == kObjectType);
+
+    r.EnterObject();
+    while (const char* key = r.NextObjectKey()) {
+        if (0 == strcmp(key, "hello")) {
+            RAPIDJSON_ASSERT(r.PeekType() == kStringType);
+            cout << key << ":" << r.GetString() << endl;
+        }
+        else if (0 == strcmp(key, "t") || 0 == strcmp(key, "f")) {
+            RAPIDJSON_ASSERT(r.PeekType() == kTrueType || r.PeekType() == kFalseType);
+            cout << key << ":" << r.GetBool() << endl;
+            continue;
+        }
+        else if (0 == strcmp(key, "n")) {
+            RAPIDJSON_ASSERT(r.PeekType() == kNullType);
+            r.GetNull();
+            cout << key << endl;
+            continue;
+        }
+        else if (0 == strcmp(key, "pi")) {
+            RAPIDJSON_ASSERT(r.PeekType() == kNumberType);
+            cout << key << ":" << r.GetDouble() << endl;
+            continue;
+        }
+        else if (0 == strcmp(key, "a")) {
+            RAPIDJSON_ASSERT(r.PeekType() == kArrayType);
+            
+            r.EnterArray();
+            
+            cout << key << ":[ ";
+            while (r.NextArrayValue()) {
+                if (r.PeekType() == kNumberType) {
+                    cout << r.GetDouble() << " ";
+                }
+                else if (r.PeekType() == kStringType) {
+                    cout << r.GetString() << " ";
+                }
+                else {
+                    r.SkipArray();
+                    break;
+                }
+            }
+            
+            cout << "]" << endl;
+        }
+        else {
+            cout << key << ":skipped" << endl;
+            r.SkipValue();
+        }
+    }
+    
+    return 0;
+}
+
+RAPIDJSON_DIAG_POP
diff --git a/example/simplepullreader/simplepullreader.cpp b/example/simplepullreader/simplepullreader.cpp
new file mode 100644
index 000000000..a4fb1161a
--- /dev/null
+++ b/example/simplepullreader/simplepullreader.cpp
@@ -0,0 +1,53 @@
+#include "rapidjson/reader.h"
+#include <iostream>
+#include <sstream>
+
+using namespace rapidjson;
+using namespace std;
+
+// If you can require C++11, you could use std::to_string here
+template <typename T> std::string stringify(T x) {
+    std::stringstream ss;
+    ss << x;
+    return ss.str();
+}
+
+struct MyHandler {
+    const char* type;
+    std::string data;
+    
+    MyHandler() : type(), data() {}
+
+    bool Null() { type = "Null"; data.clear(); return true; }
+    bool Bool(bool b) { type = "Bool:"; data = b? "true": "false"; return true; }
+    bool Int(int i) { type = "Int:"; data = stringify(i); return true; }
+    bool Uint(unsigned u) { type = "Uint:"; data = stringify(u); return true; }
+    bool Int64(int64_t i) { type = "Int64:"; data = stringify(i); return true; }
+    bool Uint64(uint64_t u) { type = "Uint64:"; data = stringify(u); return true; }
+    bool Double(double d) { type = "Double:"; data = stringify(d); return true; }
+    bool RawNumber(const char* str, SizeType length, bool) { type = "Number:"; data = std::string(str, length); return true; }
+    bool String(const char* str, SizeType length, bool) { type = "String:"; data = std::string(str, length); return true; }
+    bool StartObject() { type = "StartObject"; data.clear(); return true; }
+    bool Key(const char* str, SizeType length, bool) { type = "Key:"; data = std::string(str, length); return true; }
+    bool EndObject(SizeType memberCount) { type = "EndObject:"; data = stringify(memberCount); return true; }
+    bool StartArray() { type = "StartArray"; data.clear(); return true; }
+    bool EndArray(SizeType elementCount) { type = "EndArray:"; data = stringify(elementCount); return true; }
+private:
+    MyHandler(const MyHandler& noCopyConstruction);
+    MyHandler& operator=(const MyHandler& noAssignment);
+};
+
+int main() {
+    const char json[] = " { \"hello\" : \"world\", \"t\" : true , \"f\" : false, \"n\": null, \"i\":123, \"pi\": 3.1416, \"a\":[1, 2, 3, 4] } ";
+
+    MyHandler handler;
+    Reader reader;
+    StringStream ss(json);
+    reader.IterativeParseInit();
+    while (!reader.IterativeParseComplete()) {
+        reader.IterativeParseNext<kParseDefaultFlags>(ss, handler);
+        cout << handler.type << handler.data << endl;
+    }
+
+    return 0;
+}
diff --git a/include/rapidjson/allocators.h b/include/rapidjson/allocators.h
index 98affe03f..655f4a385 100644
--- a/include/rapidjson/allocators.h
+++ b/include/rapidjson/allocators.h
@@ -236,7 +236,7 @@ class MemoryPoolAllocator {
     */
     bool AddChunk(size_t capacity) {
         if (!baseAllocator_)
-            ownBaseAllocator_ = baseAllocator_ = RAPIDJSON_NEW(BaseAllocator());
+            ownBaseAllocator_ = baseAllocator_ = RAPIDJSON_NEW(BaseAllocator)();
         if (ChunkHeader* chunk = reinterpret_cast<ChunkHeader*>(baseAllocator_->Malloc(RAPIDJSON_ALIGN(sizeof(ChunkHeader)) + capacity))) {
             chunk->capacity = capacity;
             chunk->size = 0;
diff --git a/include/rapidjson/document.h b/include/rapidjson/document.h
index f4dd25c40..3169bd487 100644
--- a/include/rapidjson/document.h
+++ b/include/rapidjson/document.h
@@ -29,6 +29,14 @@ RAPIDJSON_DIAG_PUSH
 #ifdef _MSC_VER
 RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant
 RAPIDJSON_DIAG_OFF(4244) // conversion from kXxxFlags to 'uint16_t', possible loss of data
+#ifdef _MINWINDEF_       // see: http://stackoverflow.com/questions/22744262/cant-call-stdmax-because-minwindef-h-defines-max
+#ifndef NOMINMAX
+#pragma push_macro("min")
+#pragma push_macro("max")
+#undef min
+#undef max
+#endif
+#endif
 #endif
 
 #ifdef __clang__
@@ -300,7 +308,7 @@ struct GenericStringRef {
      */
 #endif
     explicit GenericStringRef(const CharType* str)
-        : s(str), length(internal::StrLen(str)){ RAPIDJSON_ASSERT(s != 0); }
+        : s(str), length(NotNullStrLen(str)) {}
 
     //! Create constant string reference from pointer and length
 #ifndef __clang__ // -Wdocumentation
@@ -312,7 +320,7 @@ struct GenericStringRef {
      */
 #endif
     GenericStringRef(const CharType* str, SizeType len)
-        : s(str), length(len) { RAPIDJSON_ASSERT(s != 0); }
+        : s(RAPIDJSON_LIKELY(str) ? str : emptyString), length(len) { RAPIDJSON_ASSERT(str != 0 || len == 0u); }
 
     GenericStringRef(const GenericStringRef& rhs) : s(rhs.s), length(rhs.length) {}
 
@@ -323,6 +331,14 @@ struct GenericStringRef {
     const SizeType length; //!< length of the string (excluding the trailing NULL terminator)
 
 private:
+    SizeType NotNullStrLen(const CharType* str) {
+        RAPIDJSON_ASSERT(str != 0);
+        return internal::StrLen(str);
+    }
+
+    /// Empty string - used when passing in a NULL pointer
+    static const Ch emptyString[];
+
     //! Disallow construction from non-const array
     template<SizeType N>
     GenericStringRef(CharType (&str)[N]) /* = delete */;
@@ -330,6 +346,9 @@ struct GenericStringRef {
     GenericStringRef& operator=(const GenericStringRef& rhs) /* = delete */;
 };
 
+template<typename CharType>
+const CharType GenericStringRef<CharType>::emptyString[] = { CharType() };
+
 //! Mark a character pointer as constant string
 /*! Mark a plain character pointer as a "string literal".  This function
     can be used to avoid copying a character string to be referenced as a
@@ -344,7 +363,7 @@ struct GenericStringRef {
 */
 template<typename CharType>
 inline GenericStringRef<CharType> StringRef(const CharType* str) {
-    return GenericStringRef<CharType>(str, internal::StrLen(str));
+    return GenericStringRef<CharType>(str);
 }
 
 //! Mark a character pointer as constant string
@@ -507,7 +526,7 @@ struct TypeHelper<ValueType, typename ValueType::Object> {
     static bool Is(const ValueType& v) { return v.IsObject(); }
     static ObjectType Get(ValueType& v) { return v.GetObject(); }
     static ValueType& Set(ValueType& v, ObjectType data) { return v = data; }
-    static ValueType& Set(ValueType& v, ObjectType data, typename ValueType::AllocatorType&) { v = data; }
+    static ValueType& Set(ValueType& v, ObjectType data, typename ValueType::AllocatorType&) { return v = data; }
 };
 
 template<typename ValueType> 
@@ -607,10 +626,50 @@ class GenericValue {
         \tparam SourceAllocator allocator of \c rhs
         \param rhs Value to copy from (read-only)
         \param allocator Allocator for allocating copied elements and buffers. Commonly use GenericDocument::GetAllocator().
+        \param copyConstStrings Force copying of constant strings (e.g. referencing an in-situ buffer)
         \see CopyFrom()
     */
-    template< typename SourceAllocator >
-    GenericValue(const GenericValue<Encoding, SourceAllocator>& rhs, Allocator & allocator);
+    template <typename SourceAllocator>
+    GenericValue(const GenericValue<Encoding,SourceAllocator>& rhs, Allocator& allocator, bool copyConstStrings = false) {
+        switch (rhs.GetType()) {
+        case kObjectType: {
+                SizeType count = rhs.data_.o.size;
+                Member* lm = reinterpret_cast<Member*>(allocator.Malloc(count * sizeof(Member)));
+                const typename GenericValue<Encoding,SourceAllocator>::Member* rm = rhs.GetMembersPointer();
+                for (SizeType i = 0; i < count; i++) {
+                    new (&lm[i].name) GenericValue(rm[i].name, allocator, copyConstStrings);
+                    new (&lm[i].value) GenericValue(rm[i].value, allocator, copyConstStrings);
+                }
+                data_.f.flags = kObjectFlag;
+                data_.o.size = data_.o.capacity = count;
+                SetMembersPointer(lm);
+            }
+            break;
+        case kArrayType: {
+                SizeType count = rhs.data_.a.size;
+                GenericValue* le = reinterpret_cast<GenericValue*>(allocator.Malloc(count * sizeof(GenericValue)));
+                const GenericValue<Encoding,SourceAllocator>* re = rhs.GetElementsPointer();
+                for (SizeType i = 0; i < count; i++)
+                    new (&le[i]) GenericValue(re[i], allocator, copyConstStrings);
+                data_.f.flags = kArrayFlag;
+                data_.a.size = data_.a.capacity = count;
+                SetElementsPointer(le);
+            }
+            break;
+        case kStringType:
+            if (rhs.data_.f.flags == kConstStringFlag && !copyConstStrings) {
+                data_.f.flags = rhs.data_.f.flags;
+                data_  = *reinterpret_cast<const Data*>(&rhs.data_);
+            }
+            else
+                SetStringRaw(StringRef(rhs.GetString(), rhs.GetStringLength()), allocator);
+            break;
+        default:
+            data_.f.flags = rhs.data_.f.flags;
+            data_  = *reinterpret_cast<const Data*>(&rhs.data_);
+            break;
+        }
+    }
 
     //! Constructor for boolean value.
     /*! \param b Boolean value
@@ -803,12 +862,13 @@ class GenericValue {
         \tparam SourceAllocator Allocator type of \c rhs
         \param rhs Value to copy from (read-only)
         \param allocator Allocator to use for copying
+        \param copyConstStrings Force copying of constant strings (e.g. referencing an in-situ buffer)
      */
     template <typename SourceAllocator>
-    GenericValue& CopyFrom(const GenericValue<Encoding, SourceAllocator>& rhs, Allocator& allocator) {
+    GenericValue& CopyFrom(const GenericValue<Encoding, SourceAllocator>& rhs, Allocator& allocator, bool copyConstStrings = false) {
         RAPIDJSON_ASSERT(static_cast<void*>(this) != static_cast<void const*>(&rhs));
         this->~GenericValue();
-        new (this) GenericValue(rhs, allocator);
+        new (this) GenericValue(rhs, allocator, copyConstStrings);
         return *this;
     }
 
@@ -1713,7 +1773,7 @@ class GenericValue {
         \return The value itself for fluent API.
         \post IsString() == true && GetString() != s && strcmp(GetString(),s) == 0 && GetStringLength() == length
     */
-    GenericValue& SetString(const Ch* s, SizeType length, Allocator& allocator) { this->~GenericValue(); SetStringRaw(StringRef(s, length), allocator); return *this; }
+    GenericValue& SetString(const Ch* s, SizeType length, Allocator& allocator) { return SetString(StringRef(s, length), allocator); }
 
     //! Set this value as a string by copying from source string.
     /*! \param s source string. 
@@ -1721,7 +1781,15 @@ class GenericValue {
         \return The value itself for fluent API.
         \post IsString() == true && GetString() != s && strcmp(GetString(),s) == 0 && GetStringLength() == length
     */
-    GenericValue& SetString(const Ch* s, Allocator& allocator) { return SetString(s, internal::StrLen(s), allocator); }
+    GenericValue& SetString(const Ch* s, Allocator& allocator) { return SetString(StringRef(s), allocator); }
+
+    //! Set this value as a string by copying from source string.
+    /*! \param s source string reference
+        \param allocator Allocator for allocating copied buffer. Commonly use GenericDocument::GetAllocator().
+        \return The value itself for fluent API.
+        \post IsString() == true && GetString() != s.s && strcmp(GetString(),s) == 0 && GetStringLength() == length
+    */
+    GenericValue& SetString(StringRefType s, Allocator& allocator) { this->~GenericValue(); SetStringRaw(s, allocator); return *this; }
 
 #if RAPIDJSON_HAS_STDSTRING
     //! Set this value as a string by copying from source string.
@@ -1731,7 +1799,7 @@ class GenericValue {
         \post IsString() == true && GetString() != s.data() && strcmp(GetString(),s.data() == 0 && GetStringLength() == s.size()
         \note Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING.
     */
-    GenericValue& SetString(const std::basic_string<Ch>& s, Allocator& allocator) { return SetString(s.data(), SizeType(s.size()), allocator); }
+    GenericValue& SetString(const std::basic_string<Ch>& s, Allocator& allocator) { return SetString(StringRef(s), allocator); }
 #endif
 
     //@}
@@ -2041,7 +2109,7 @@ class GenericDocument : public GenericValue<Encoding, Allocator> {
         GenericValue<Encoding, Allocator>(type),  allocator_(allocator), ownAllocator_(0), stack_(stackAllocator, stackCapacity), parseResult_()
     {
         if (!allocator_)
-            ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator());
+            ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)();
     }
 
     //! Constructor
@@ -2054,7 +2122,7 @@ class GenericDocument : public GenericValue<Encoding, Allocator> {
         allocator_(allocator), ownAllocator_(0), stack_(stackAllocator, stackCapacity), parseResult_()
     {
         if (!allocator_)
-            ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator());
+            ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)();
     }
 
 #if RAPIDJSON_HAS_CXX11_RVALUE_REFS
@@ -2246,7 +2314,7 @@ class GenericDocument : public GenericValue<Encoding, Allocator> {
     template <unsigned parseFlags, typename SourceEncoding>
     GenericDocument& Parse(const typename SourceEncoding::Ch* str, size_t length) {
         RAPIDJSON_ASSERT(!(parseFlags & kParseInsituFlag));
-        MemoryStream ms(static_cast<const char*>(str), length * sizeof(typename SourceEncoding::Ch));
+        MemoryStream ms(reinterpret_cast<const char*>(str), length * sizeof(typename SourceEncoding::Ch));
         EncodedInputStream<SourceEncoding, MemoryStream> is(ms);
         ParseStream<parseFlags, SourceEncoding>(is);
         return *this;
@@ -2404,35 +2472,6 @@ class GenericDocument : public GenericValue<Encoding, Allocator> {
 //! GenericDocument with UTF8 encoding
 typedef GenericDocument<UTF8<> > Document;
 
-// defined here due to the dependency on GenericDocument
-template <typename Encoding, typename Allocator>
-template <typename SourceAllocator>
-inline
-GenericValue<Encoding,Allocator>::GenericValue(const GenericValue<Encoding,SourceAllocator>& rhs, Allocator& allocator)
-{
-    switch (rhs.GetType()) {
-    case kObjectType:
-    case kArrayType: { // perform deep copy via SAX Handler
-            GenericDocument<Encoding,Allocator> d(&allocator);
-            rhs.Accept(d);
-            RawAssign(*d.stack_.template Pop<GenericValue>(1));
-        }
-        break;
-    case kStringType:
-        if (rhs.data_.f.flags == kConstStringFlag) {
-            data_.f.flags = rhs.data_.f.flags;
-            data_  = *reinterpret_cast<const Data*>(&rhs.data_);
-        } else {
-            SetStringRaw(StringRef(rhs.GetString(), rhs.GetStringLength()), allocator);
-        }
-        break;
-    default:
-        data_.f.flags = rhs.data_.f.flags;
-        data_  = *reinterpret_cast<const Data*>(&rhs.data_);
-        break;
-    }
-}
-
 //! Helper class for accessing Value of array type.
 /*!
     Instance of this helper class is obtained by \c GenericValue::GetArray().
@@ -2546,7 +2585,7 @@ class GenericObject {
     GenericObject AddMember(StringRefType name, ValueType& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
     GenericObject AddMember(StringRefType name, StringRefType value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
     template <typename T> RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (GenericObject)) AddMember(StringRefType name, T value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
-    void RemoveAllMembers() { return value_.RemoveAllMembers(); }
+    void RemoveAllMembers() { value_.RemoveAllMembers(); }
     bool RemoveMember(const Ch* name) const { return value_.RemoveMember(name); }
 #if RAPIDJSON_HAS_STDSTRING
     bool RemoveMember(const std::basic_string<Ch>& name) const { return value_.RemoveMember(name); }
@@ -2573,6 +2612,12 @@ class GenericObject {
 };
 
 RAPIDJSON_NAMESPACE_END
+#ifdef _MINWINDEF_       // see: http://stackoverflow.com/questions/22744262/cant-call-stdmax-because-minwindef-h-defines-max
+#ifndef NOMINMAX
+#pragma pop_macro("min")
+#pragma pop_macro("max")
+#endif
+#endif
 RAPIDJSON_DIAG_POP
 
 #endif // RAPIDJSON_DOCUMENT_H_
diff --git a/include/rapidjson/encodedstream.h b/include/rapidjson/encodedstream.h
index 145068386..223601c05 100644
--- a/include/rapidjson/encodedstream.h
+++ b/include/rapidjson/encodedstream.h
@@ -200,7 +200,7 @@ class AutoUTFInputStream {
         // xx xx xx xx  UTF-8
 
         if (!hasBOM_) {
-            unsigned pattern = (c[0] ? 1 : 0) | (c[1] ? 2 : 0) | (c[2] ? 4 : 0) | (c[3] ? 8 : 0);
+            int pattern = (c[0] ? 1 : 0) | (c[1] ? 2 : 0) | (c[2] ? 4 : 0) | (c[3] ? 8 : 0);
             switch (pattern) {
             case 0x08: type_ = kUTF32BE; break;
             case 0x0A: type_ = kUTF16BE; break;
diff --git a/include/rapidjson/encodings.h b/include/rapidjson/encodings.h
index baa7c2b17..0df1c3435 100644
--- a/include/rapidjson/encodings.h
+++ b/include/rapidjson/encodings.h
@@ -157,7 +157,7 @@ struct UTF8 {
         if (type >= 32) {
             *codepoint = 0;
         } else {
-            *codepoint = (0xFF >> type) & static_cast<unsigned char>(c);
+            *codepoint = (0xFFu >> type) & static_cast<unsigned char>(c);
         }
         bool result = true;
         switch (type) {
@@ -283,7 +283,7 @@ struct UTF16 {
             RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
             unsigned v = codepoint - 0x10000;
             os.Put(static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800));
-            os.Put((v & 0x3FF) | 0xDC00);
+            os.Put(static_cast<typename OutputStream::Ch>((v & 0x3FF) | 0xDC00));
         }
     }
 
@@ -299,7 +299,7 @@ struct UTF16 {
             RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
             unsigned v = codepoint - 0x10000;
             PutUnsafe(os, static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800));
-            PutUnsafe(os, (v & 0x3FF) | 0xDC00);
+            PutUnsafe(os, static_cast<typename OutputStream::Ch>((v & 0x3FF) | 0xDC00));
         }
     }
 
@@ -620,28 +620,28 @@ struct AutoUTF {
 #define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x
 
     template<typename OutputStream>
-    RAPIDJSON_FORCEINLINE static void Encode(OutputStream& os, unsigned codepoint) {
+    static RAPIDJSON_FORCEINLINE void Encode(OutputStream& os, unsigned codepoint) {
         typedef void (*EncodeFunc)(OutputStream&, unsigned);
         static const EncodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Encode) };
         (*f[os.GetType()])(os, codepoint);
     }
 
     template<typename OutputStream>
-    RAPIDJSON_FORCEINLINE static void EncodeUnsafe(OutputStream& os, unsigned codepoint) {
+    static RAPIDJSON_FORCEINLINE void EncodeUnsafe(OutputStream& os, unsigned codepoint) {
         typedef void (*EncodeFunc)(OutputStream&, unsigned);
         static const EncodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(EncodeUnsafe) };
         (*f[os.GetType()])(os, codepoint);
     }
 
     template <typename InputStream>
-    RAPIDJSON_FORCEINLINE static bool Decode(InputStream& is, unsigned* codepoint) {
+    static RAPIDJSON_FORCEINLINE bool Decode(InputStream& is, unsigned* codepoint) {
         typedef bool (*DecodeFunc)(InputStream&, unsigned*);
         static const DecodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Decode) };
         return (*f[is.GetType()])(is, codepoint);
     }
 
     template <typename InputStream, typename OutputStream>
-    RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) {
+    static RAPIDJSON_FORCEINLINE bool Validate(InputStream& is, OutputStream& os) {
         typedef bool (*ValidateFunc)(InputStream&, OutputStream&);
         static const ValidateFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Validate) };
         return (*f[is.GetType()])(is, os);
@@ -658,7 +658,7 @@ template<typename SourceEncoding, typename TargetEncoding>
 struct Transcoder {
     //! Take one Unicode codepoint from source encoding, convert it to target encoding and put it to the output stream.
     template<typename InputStream, typename OutputStream>
-    RAPIDJSON_FORCEINLINE static bool Transcode(InputStream& is, OutputStream& os) {
+    static RAPIDJSON_FORCEINLINE bool Transcode(InputStream& is, OutputStream& os) {
         unsigned codepoint;
         if (!SourceEncoding::Decode(is, &codepoint))
             return false;
@@ -667,7 +667,7 @@ struct Transcoder {
     }
 
     template<typename InputStream, typename OutputStream>
-    RAPIDJSON_FORCEINLINE static bool TranscodeUnsafe(InputStream& is, OutputStream& os) {
+    static RAPIDJSON_FORCEINLINE bool TranscodeUnsafe(InputStream& is, OutputStream& os) {
         unsigned codepoint;
         if (!SourceEncoding::Decode(is, &codepoint))
             return false;
@@ -677,7 +677,7 @@ struct Transcoder {
 
     //! Validate one Unicode codepoint from an encoded stream.
     template<typename InputStream, typename OutputStream>
-    RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) {
+    static RAPIDJSON_FORCEINLINE bool Validate(InputStream& is, OutputStream& os) {
         return Transcode(is, os);   // Since source/target encoding is different, must transcode.
     }
 };
@@ -690,19 +690,19 @@ inline void PutUnsafe(Stream& stream, typename Stream::Ch c);
 template<typename Encoding>
 struct Transcoder<Encoding, Encoding> {
     template<typename InputStream, typename OutputStream>
-    RAPIDJSON_FORCEINLINE static bool Transcode(InputStream& is, OutputStream& os) {
+    static RAPIDJSON_FORCEINLINE bool Transcode(InputStream& is, OutputStream& os) {
         os.Put(is.Take());  // Just copy one code unit. This semantic is different from primary template class.
         return true;
     }
     
     template<typename InputStream, typename OutputStream>
-    RAPIDJSON_FORCEINLINE static bool TranscodeUnsafe(InputStream& is, OutputStream& os) {
+    static RAPIDJSON_FORCEINLINE bool TranscodeUnsafe(InputStream& is, OutputStream& os) {
         PutUnsafe(os, is.Take());  // Just copy one code unit. This semantic is different from primary template class.
         return true;
     }
     
     template<typename InputStream, typename OutputStream>
-    RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) {
+    static RAPIDJSON_FORCEINLINE bool Validate(InputStream& is, OutputStream& os) {
         return Encoding::Validate(is, os);  // source/target encoding are the same
     }
 };
diff --git a/include/rapidjson/error/error.h b/include/rapidjson/error/error.h
index 95cb31a72..9311d2f03 100644
--- a/include/rapidjson/error/error.h
+++ b/include/rapidjson/error/error.h
@@ -104,6 +104,8 @@ enum ParseErrorCode {
     \see GenericReader::Parse, GenericDocument::Parse
 */
 struct ParseResult {
+    //!! Unspecified boolean type
+    typedef bool (ParseResult::*BooleanType)() const;
 public:
     //! Default constructor, no error.
     ParseResult() : code_(kParseErrorNone), offset_(0) {}
@@ -115,8 +117,8 @@ struct ParseResult {
     //! Get the error offset, if \ref IsError(), 0 otherwise.
     size_t Offset() const { return offset_; }
 
-    //! Conversion to \c bool, returns \c true, iff !\ref IsError().
-    operator bool() const { return !IsError(); }
+    //! Explicit conversion to \c bool, returns \c true, iff !\ref IsError().
+    operator BooleanType() const { return !IsError() ? &ParseResult::IsError : NULL; }
     //! Whether the result is an error.
     bool IsError() const { return code_ != kParseErrorNone; }
 
@@ -124,6 +126,10 @@ struct ParseResult {
     bool operator==(ParseErrorCode code) const { return code_ == code; }
     friend bool operator==(ParseErrorCode code, const ParseResult & err) { return code == err.code_; }
 
+    bool operator!=(const ParseResult& that) const { return !(*this == that); }
+    bool operator!=(ParseErrorCode code) const { return !(*this == code); }
+    friend bool operator!=(ParseErrorCode code, const ParseResult & err) { return err != code; }
+
     //! Reset error code.
     void Clear() { Set(kParseErrorNone); }
     //! Update error code and offset.
diff --git a/include/rapidjson/internal/diyfp.h b/include/rapidjson/internal/diyfp.h
index c9fefdc61..29abf8046 100644
--- a/include/rapidjson/internal/diyfp.h
+++ b/include/rapidjson/internal/diyfp.h
@@ -21,7 +21,7 @@
 
 #include "../rapidjson.h"
 
-#if defined(_MSC_VER) && defined(_M_AMD64)
+#if defined(_MSC_VER) && defined(_M_AMD64) && !defined(__INTEL_COMPILER)
 #include <intrin.h>
 #pragma intrinsic(_BitScanReverse64)
 #pragma intrinsic(_umul128)
diff --git a/include/rapidjson/internal/dtoa.h b/include/rapidjson/internal/dtoa.h
index 8d6350e62..bf2e9b2e5 100644
--- a/include/rapidjson/internal/dtoa.h
+++ b/include/rapidjson/internal/dtoa.h
@@ -41,7 +41,7 @@ inline void GrisuRound(char* buffer, int len, uint64_t delta, uint64_t rest, uin
     }
 }
 
-inline unsigned CountDecimalDigit32(uint32_t n) {
+inline int CountDecimalDigit32(uint32_t n) {
     // Simple pure C++ implementation was faster than __builtin_clz version in this situation.
     if (n < 10) return 1;
     if (n < 100) return 2;
@@ -63,7 +63,7 @@ inline void DigitGen(const DiyFp& W, const DiyFp& Mp, uint64_t delta, char* buff
     const DiyFp wp_w = Mp - W;
     uint32_t p1 = static_cast<uint32_t>(Mp.f >> -one.e);
     uint64_t p2 = Mp.f & (one.f - 1);
-    unsigned kappa = CountDecimalDigit32(p1); // kappa in [0, 9]
+    int kappa = CountDecimalDigit32(p1); // kappa in [0, 9]
     *len = 0;
 
     while (kappa > 0) {
@@ -102,8 +102,8 @@ inline void DigitGen(const DiyFp& W, const DiyFp& Mp, uint64_t delta, char* buff
         kappa--;
         if (p2 < delta) {
             *K += kappa;
-            int index = -static_cast<int>(kappa);
-            GrisuRound(buffer, *len, delta, p2, one.f, wp_w.f * (index < 9 ? kPow10[-static_cast<int>(kappa)] : 0));
+            int index = -kappa;
+            GrisuRound(buffer, *len, delta, p2, one.f, wp_w.f * (index < 9 ? kPow10[index] : 0));
             return;
         }
     }
diff --git a/include/rapidjson/internal/ieee754.h b/include/rapidjson/internal/ieee754.h
index 82bb0b99e..c2684ba2a 100644
--- a/include/rapidjson/internal/ieee754.h
+++ b/include/rapidjson/internal/ieee754.h
@@ -48,13 +48,13 @@ class Double {
     int IntegerExponent() const { return (IsNormal() ? Exponent() : kDenormalExponent) - kSignificandSize; }
     uint64_t ToBias() const { return (u_ & kSignMask) ? ~u_ + 1 : u_ | kSignMask; }
 
-    static unsigned EffectiveSignificandSize(int order) {
+    static int EffectiveSignificandSize(int order) {
         if (order >= -1021)
             return 53;
         else if (order <= -1074)
             return 0;
         else
-            return static_cast<unsigned>(order) + 1074;
+            return order + 1074;
     }
 
 private:
diff --git a/include/rapidjson/internal/regex.h b/include/rapidjson/internal/regex.h
index 8530cd771..e1a2faae5 100644
--- a/include/rapidjson/internal/regex.h
+++ b/include/rapidjson/internal/regex.h
@@ -29,6 +29,9 @@ RAPIDJSON_DIAG_OFF(implicit-fallthrough)
 #ifdef __GNUC__
 RAPIDJSON_DIAG_PUSH
 RAPIDJSON_DIAG_OFF(effc++)
+#if __GNUC__ >= 7
+RAPIDJSON_DIAG_OFF(implicit-fallthrough)
+#endif
 #endif
 
 #ifdef _MSC_VER
@@ -606,7 +609,7 @@ class GenericRegexSearch {
     {
         RAPIDJSON_ASSERT(regex_.IsValid());
         if (!allocator_)
-            ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator());
+            ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)();
         stateSet_ = static_cast<unsigned*>(allocator_->Malloc(GetStateSetSize()));
         state0_.template Reserve<SizeType>(regex_.stateCount_);
         state1_.template Reserve<SizeType>(regex_.stateCount_);
@@ -688,8 +691,8 @@ class GenericRegexSearch {
             bool matched = AddState(l, s.out);
             return AddState(l, s.out1) || matched;
         }
-        else if (!(stateSet_[index >> 5] & (1 << (index & 31)))) {
-            stateSet_[index >> 5] |= (1 << (index & 31));
+        else if (!(stateSet_[index >> 5] & (1u << (index & 31)))) {
+            stateSet_[index >> 5] |= (1u << (index & 31));
             *l.template PushUnsafe<SizeType>() = index;
         }
         return s.out == kRegexInvalidState; // by using PushUnsafe() above, we can ensure s is not validated due to reallocation.
diff --git a/include/rapidjson/internal/stack.h b/include/rapidjson/internal/stack.h
index 022c9aab4..5c5398c35 100644
--- a/include/rapidjson/internal/stack.h
+++ b/include/rapidjson/internal/stack.h
@@ -126,6 +126,7 @@ class Stack {
 
     template<typename T>
     RAPIDJSON_FORCEINLINE T* PushUnsafe(size_t count = 1) {
+        RAPIDJSON_ASSERT(stackTop_);
         RAPIDJSON_ASSERT(stackTop_ + sizeof(T) * count <= stackEnd_);
         T* ret = reinterpret_cast<T*>(stackTop_);
         stackTop_ += sizeof(T) * count;
@@ -183,7 +184,7 @@ class Stack {
         size_t newCapacity;
         if (stack_ == 0) {
             if (!allocator_)
-                ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator());
+                ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)();
             newCapacity = initialCapacity_;
         } else {
             newCapacity = GetCapacity();
diff --git a/include/rapidjson/internal/strfunc.h b/include/rapidjson/internal/strfunc.h
index de41d8f9c..226439a76 100644
--- a/include/rapidjson/internal/strfunc.h
+++ b/include/rapidjson/internal/strfunc.h
@@ -16,6 +16,7 @@
 #define RAPIDJSON_INTERNAL_STRFUNC_H_
 
 #include "../stream.h"
+#include <cwchar>
 
 RAPIDJSON_NAMESPACE_BEGIN
 namespace internal {
@@ -34,6 +35,16 @@ inline SizeType StrLen(const Ch* s) {
     return SizeType(p - s);
 }
 
+template <>
+inline SizeType StrLen(const char* s) {
+    return SizeType(std::strlen(s));
+}
+
+template <>
+inline SizeType StrLen(const wchar_t* s) {
+    return SizeType(std::wcslen(s));
+}
+
 //! Returns number of code points in a encoded string.
 template<typename Encoding>
 bool CountStringCodePoint(const typename Encoding::Ch* s, SizeType length, SizeType* outCount) {
diff --git a/include/rapidjson/internal/strtod.h b/include/rapidjson/internal/strtod.h
index 289c413b0..adf49e349 100644
--- a/include/rapidjson/internal/strtod.h
+++ b/include/rapidjson/internal/strtod.h
@@ -140,8 +140,8 @@ inline bool StrtodDiyFp(const char* decimals, size_t length, size_t decimalPosit
         significand++;
 
     size_t remaining = length - i;
-    const unsigned kUlpShift = 3;
-    const unsigned kUlp = 1 << kUlpShift;
+    const int kUlpShift = 3;
+    const int kUlp = 1 << kUlpShift;
     int64_t error = (remaining == 0) ? 0 : kUlp / 2;
 
     DiyFp v(significand, 0);
@@ -177,17 +177,17 @@ inline bool StrtodDiyFp(const char* decimals, size_t length, size_t decimalPosit
     v = v.Normalize();
     error <<= oldExp - v.e;
 
-    const unsigned effectiveSignificandSize = Double::EffectiveSignificandSize(64 + v.e);
-    unsigned precisionSize = 64 - effectiveSignificandSize;
+    const int effectiveSignificandSize = Double::EffectiveSignificandSize(64 + v.e);
+    int precisionSize = 64 - effectiveSignificandSize;
     if (precisionSize + kUlpShift >= 64) {
-        unsigned scaleExp = (precisionSize + kUlpShift) - 63;
+        int scaleExp = (precisionSize + kUlpShift) - 63;
         v.f >>= scaleExp;
         v.e += scaleExp; 
-        error = (error >> scaleExp) + 1 + static_cast<int>(kUlp);
+        error = (error >> scaleExp) + 1 + kUlp;
         precisionSize -= scaleExp;
     }
 
-    DiyFp rounded(v.f >> precisionSize, v.e + static_cast<int>(precisionSize));
+    DiyFp rounded(v.f >> precisionSize, v.e + precisionSize);
     const uint64_t precisionBits = (v.f & ((uint64_t(1) << precisionSize) - 1)) * kUlp;
     const uint64_t halfWay = (uint64_t(1) << (precisionSize - 1)) * kUlp;
     if (precisionBits >= halfWay + static_cast<unsigned>(error)) {
diff --git a/include/rapidjson/istreamwrapper.h b/include/rapidjson/istreamwrapper.h
index f5fe28977..8639c8c3c 100644
--- a/include/rapidjson/istreamwrapper.h
+++ b/include/rapidjson/istreamwrapper.h
@@ -54,7 +54,7 @@ class BasicIStreamWrapper {
 
     Ch Peek() const { 
         typename StreamType::int_type c = stream_.peek();
-        return RAPIDJSON_LIKELY(c != StreamType::traits_type::eof()) ? static_cast<Ch>(c) : '\0';
+        return RAPIDJSON_LIKELY(c != StreamType::traits_type::eof()) ? static_cast<Ch>(c) : static_cast<Ch>('\0');
     }
 
     Ch Take() { 
diff --git a/include/rapidjson/pointer.h b/include/rapidjson/pointer.h
index 0206ac1c8..0f377efec 100644
--- a/include/rapidjson/pointer.h
+++ b/include/rapidjson/pointer.h
@@ -240,7 +240,7 @@ class GenericPointer {
     template <typename T>
     RAPIDJSON_DISABLEIF_RETURN((internal::NotExpr<internal::IsSame<typename internal::RemoveConst<T>::Type, Ch> >), (GenericPointer))
     Append(T* name, Allocator* allocator = 0) const {
-        return Append(name, StrLen(name), allocator);
+        return Append(name, internal::StrLen(name), allocator);
     }
 
 #if RAPIDJSON_HAS_STDSTRING
@@ -274,7 +274,7 @@ class GenericPointer {
         else {
             Ch name[21];
             for (size_t i = 0; i <= length; i++)
-                name[i] = buffer[i];
+                name[i] = static_cast<Ch>(buffer[i]);
             Token token = { name, length, index };
             return Append(token, allocator);
         }
@@ -758,7 +758,7 @@ class GenericPointer {
     */
     Ch* CopyFromRaw(const GenericPointer& rhs, size_t extraToken = 0, size_t extraNameBufferSize = 0) {
         if (!allocator_) // allocator is independently owned.
-            ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator());
+            ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)();
 
         size_t nameBufferSize = rhs.tokenCount_; // null terminators for tokens
         for (Token *t = rhs.tokens_; t != rhs.tokens_ + rhs.tokenCount_; ++t)
@@ -806,7 +806,7 @@ class GenericPointer {
 
         // Create own allocator if user did not supply.
         if (!allocator_)
-            ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator());
+            ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)();
 
         // Count number of '/' as tokenCount
         tokenCount_ = 0;
@@ -1029,8 +1029,8 @@ class GenericPointer {
             unsigned char u = static_cast<unsigned char>(c);
             static const char hexDigits[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
             os_.Put('%');
-            os_.Put(hexDigits[u >> 4]);
-            os_.Put(hexDigits[u & 15]);
+            os_.Put(static_cast<typename OutputStream::Ch>(hexDigits[u >> 4]));
+            os_.Put(static_cast<typename OutputStream::Ch>(hexDigits[u & 15]));
         }
     private:
         OutputStream& os_;
diff --git a/include/rapidjson/prettywriter.h b/include/rapidjson/prettywriter.h
index abd964f6f..98dfb3060 100644
--- a/include/rapidjson/prettywriter.h
+++ b/include/rapidjson/prettywriter.h
@@ -47,7 +47,7 @@ enum PrettyFormatOptions {
 template<typename OutputStream, typename SourceEncoding = UTF8<>, typename TargetEncoding = UTF8<>, typename StackAllocator = CrtAllocator, unsigned writeFlags = kWriteDefaultFlags>
 class PrettyWriter : public Writer<OutputStream, SourceEncoding, TargetEncoding, StackAllocator, writeFlags> {
 public:
-    typedef Writer<OutputStream, SourceEncoding, TargetEncoding, StackAllocator> Base;
+    typedef Writer<OutputStream, SourceEncoding, TargetEncoding, StackAllocator, writeFlags> Base;
     typedef typename Base::Ch Ch;
 
     //! Constructor
@@ -136,8 +136,10 @@ class PrettyWriter : public Writer<OutputStream, SourceEncoding, TargetEncoding,
 	
     bool EndObject(SizeType memberCount = 0) {
         (void)memberCount;
-        RAPIDJSON_ASSERT(Base::level_stack_.GetSize() >= sizeof(typename Base::Level));
-        RAPIDJSON_ASSERT(!Base::level_stack_.template Top<typename Base::Level>()->inArray);
+        RAPIDJSON_ASSERT(Base::level_stack_.GetSize() >= sizeof(typename Base::Level)); // not inside an Object
+        RAPIDJSON_ASSERT(!Base::level_stack_.template Top<typename Base::Level>()->inArray); // currently inside an Array, not Object
+        RAPIDJSON_ASSERT(0 == Base::level_stack_.template Top<typename Base::Level>()->valueCount % 2); // Object has a Key without a Value
+       
         bool empty = Base::level_stack_.template Pop<typename Base::Level>(1)->valueCount == 0;
 
         if (!empty) {
@@ -148,7 +150,7 @@ class PrettyWriter : public Writer<OutputStream, SourceEncoding, TargetEncoding,
         (void)ret;
         RAPIDJSON_ASSERT(ret == true);
         if (Base::level_stack_.Empty()) // end of json text
-            Base::os_->Flush();
+            Base::Flush();
         return true;
     }
 
@@ -172,7 +174,7 @@ class PrettyWriter : public Writer<OutputStream, SourceEncoding, TargetEncoding,
         (void)ret;
         RAPIDJSON_ASSERT(ret == true);
         if (Base::level_stack_.Empty()) // end of json text
-            Base::os_->Flush();
+            Base::Flush();
         return true;
     }
 
@@ -249,7 +251,7 @@ class PrettyWriter : public Writer<OutputStream, SourceEncoding, TargetEncoding,
 
     void WriteIndent()  {
         size_t count = (Base::level_stack_.GetSize() / sizeof(typename Base::Level)) * indentCharCount_;
-        PutN(*Base::os_, static_cast<typename TargetEncoding::Ch>(indentChar_), count);
+        PutN(*Base::os_, static_cast<typename OutputStream::Ch>(indentChar_), count);
     }
 
     Ch indentChar_;
diff --git a/include/rapidjson/rapidjson.h b/include/rapidjson/rapidjson.h
index 053b2ce43..57ab8514d 100644
--- a/include/rapidjson/rapidjson.h
+++ b/include/rapidjson/rapidjson.h
@@ -49,6 +49,11 @@
 // token stringification
 #define RAPIDJSON_STRINGIFY(x) RAPIDJSON_DO_STRINGIFY(x)
 #define RAPIDJSON_DO_STRINGIFY(x) #x
+
+// token concatenation
+#define RAPIDJSON_JOIN(X, Y) RAPIDJSON_DO_JOIN(X, Y)
+#define RAPIDJSON_DO_JOIN(X, Y) RAPIDJSON_DO_JOIN2(X, Y)
+#define RAPIDJSON_DO_JOIN2(X, Y) X##Y
 //!@endcond
 
 /*! \def RAPIDJSON_MAJOR_VERSION
@@ -320,17 +325,17 @@
 #endif
 
 ///////////////////////////////////////////////////////////////////////////////
-// RAPIDJSON_SSE2/RAPIDJSON_SSE42/RAPIDJSON_SIMD
+// RAPIDJSON_SSE2/RAPIDJSON_SSE42/RAPIDJSON_NEON/RAPIDJSON_SIMD
 
 /*! \def RAPIDJSON_SIMD
     \ingroup RAPIDJSON_CONFIG
-    \brief Enable SSE2/SSE4.2 optimization.
+    \brief Enable SSE2/SSE4.2/Neon optimization.
 
     RapidJSON supports optimized implementations for some parsing operations
-    based on the SSE2 or SSE4.2 SIMD extensions on modern Intel-compatible
-    processors.
+    based on the SSE2, SSE4.2 or NEon SIMD extensions on modern Intel
+    or ARM compatible processors.
 
-    To enable these optimizations, two different symbols can be defined;
+    To enable these optimizations, three different symbols can be defined;
     \code
     // Enable SSE2 optimization.
     #define RAPIDJSON_SSE2
@@ -339,13 +344,17 @@
     #define RAPIDJSON_SSE42
     \endcode
 
-    \c RAPIDJSON_SSE42 takes precedence, if both are defined.
+    // Enable ARM Neon optimization.
+    #define RAPIDJSON_NEON
+    \endcode
+
+    \c RAPIDJSON_SSE42 takes precedence over SSE2, if both are defined.
 
     If any of these symbols is defined, RapidJSON defines the macro
     \c RAPIDJSON_SIMD to indicate the availability of the optimized code.
 */
 #if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42) \
-    || defined(RAPIDJSON_DOXYGEN_RUNNING)
+    || defined(RAPIDJSON_NEON) || defined(RAPIDJSON_DOXYGEN_RUNNING)
 #define RAPIDJSON_SIMD
 #endif
 
@@ -405,7 +414,15 @@ RAPIDJSON_NAMESPACE_END
 ///////////////////////////////////////////////////////////////////////////////
 // RAPIDJSON_STATIC_ASSERT
 
-// Adopt from boost
+// Prefer C++11 static_assert, if available
+#ifndef RAPIDJSON_STATIC_ASSERT
+#if __cplusplus >= 201103L || ( defined(_MSC_VER) && _MSC_VER >= 1800 )
+#define RAPIDJSON_STATIC_ASSERT(x) \
+   static_assert(x, RAPIDJSON_STRINGIFY(x))
+#endif // C++11
+#endif // RAPIDJSON_STATIC_ASSERT
+
+// Adopt C++03 implementation from boost
 #ifndef RAPIDJSON_STATIC_ASSERT
 #ifndef __clang__
 //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
@@ -413,13 +430,9 @@ RAPIDJSON_NAMESPACE_END
 RAPIDJSON_NAMESPACE_BEGIN
 template <bool x> struct STATIC_ASSERTION_FAILURE;
 template <> struct STATIC_ASSERTION_FAILURE<true> { enum { value = 1 }; };
-template<int x> struct StaticAssertTest {};
+template <size_t x> struct StaticAssertTest {};
 RAPIDJSON_NAMESPACE_END
 
-#define RAPIDJSON_JOIN(X, Y) RAPIDJSON_DO_JOIN(X, Y)
-#define RAPIDJSON_DO_JOIN(X, Y) RAPIDJSON_DO_JOIN2(X, Y)
-#define RAPIDJSON_DO_JOIN2(X, Y) X##Y
-
 #if defined(__GNUC__)
 #define RAPIDJSON_STATIC_ASSERT_UNUSED_ATTRIBUTE __attribute__((unused))
 #else
@@ -438,7 +451,7 @@ RAPIDJSON_NAMESPACE_END
     typedef ::RAPIDJSON_NAMESPACE::StaticAssertTest< \
       sizeof(::RAPIDJSON_NAMESPACE::STATIC_ASSERTION_FAILURE<bool(x) >)> \
     RAPIDJSON_JOIN(StaticAssertTypedef, __LINE__) RAPIDJSON_STATIC_ASSERT_UNUSED_ATTRIBUTE
-#endif
+#endif // RAPIDJSON_STATIC_ASSERT
 
 ///////////////////////////////////////////////////////////////////////////////
 // RAPIDJSON_LIKELY, RAPIDJSON_UNLIKELY
@@ -568,7 +581,7 @@ RAPIDJSON_NAMESPACE_END
 #ifndef RAPIDJSON_HAS_CXX11_RANGE_FOR
 #if defined(__clang__)
 #define RAPIDJSON_HAS_CXX11_RANGE_FOR __has_feature(cxx_range_for)
-#elif (defined(RAPIDJSON_GNUC) && (RAPIDJSON_GNUC >= RAPIDJSON_VERSION_CODE(4,3,0)) && defined(__GXX_EXPERIMENTAL_CXX0X__)) || \
+#elif (defined(RAPIDJSON_GNUC) && (RAPIDJSON_GNUC >= RAPIDJSON_VERSION_CODE(4,6,0)) && defined(__GXX_EXPERIMENTAL_CXX0X__)) || \
       (defined(_MSC_VER) && _MSC_VER >= 1700)
 #define RAPIDJSON_HAS_CXX11_RANGE_FOR 1
 #else
@@ -583,7 +596,7 @@ RAPIDJSON_NAMESPACE_END
 
 #ifndef RAPIDJSON_NEW
 ///! customization point for global \c new
-#define RAPIDJSON_NEW(x) new x
+#define RAPIDJSON_NEW(TypeName) new TypeName
 #endif
 #ifndef RAPIDJSON_DELETE
 ///! customization point for global \c delete
diff --git a/include/rapidjson/reader.h b/include/rapidjson/reader.h
index a8cee364e..120c31115 100644
--- a/include/rapidjson/reader.h
+++ b/include/rapidjson/reader.h
@@ -33,6 +33,8 @@
 #include <nmmintrin.h>
 #elif defined(RAPIDJSON_SSE2)
 #include <emmintrin.h>
+#elif defined(RAPIDJSON_NEON)
+#include <arm_neon.h>
 #endif
 
 #ifdef _MSC_VER
@@ -411,7 +413,92 @@ inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
     return SkipWhitespace(p, end);
 }
 
-#endif // RAPIDJSON_SSE2
+#elif defined(RAPIDJSON_NEON)
+
+//! Skip whitespace with ARM Neon instructions, testing 16 8-byte characters at once.
+inline const char *SkipWhitespace_SIMD(const char* p) {
+    // Fast return for single non-whitespace
+    if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
+        ++p;
+    else
+        return p;
+
+    // 16-byte align to the next boundary
+    const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
+    while (p != nextAligned)
+        if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
+            ++p;
+        else
+            return p;
+
+    const uint8x16_t w0 = vmovq_n_u8(' ');
+    const uint8x16_t w1 = vmovq_n_u8('\n');
+    const uint8x16_t w2 = vmovq_n_u8('\r');
+    const uint8x16_t w3 = vmovq_n_u8('\t');
+
+    for (;; p += 16) {
+        const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p));
+        uint8x16_t x = vceqq_u8(s, w0);
+        x = vorrq_u8(x, vceqq_u8(s, w1));
+        x = vorrq_u8(x, vceqq_u8(s, w2));
+        x = vorrq_u8(x, vceqq_u8(s, w3));
+
+        x = vmvnq_u8(x);                       // Negate
+        x = vrev64q_u8(x);                     // Rev in 64
+        uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0);   // extract
+        uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1);  // extract
+
+        if (low == 0) {
+            if (high != 0) {
+                int lz =__builtin_clzll(high);;
+                return p + 8 + (lz >> 3);
+            }
+        } else {
+            int lz = __builtin_clzll(low);;
+            return p + (lz >> 3);
+        }
+    }
+}
+
+inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
+    // Fast return for single non-whitespace
+    if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
+        ++p;
+    else
+        return p;
+
+    const uint8x16_t w0 = vmovq_n_u8(' ');
+    const uint8x16_t w1 = vmovq_n_u8('\n');
+    const uint8x16_t w2 = vmovq_n_u8('\r');
+    const uint8x16_t w3 = vmovq_n_u8('\t');
+
+    for (; p <= end - 16; p += 16) {
+        const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p));
+        uint8x16_t x = vceqq_u8(s, w0);
+        x = vorrq_u8(x, vceqq_u8(s, w1));
+        x = vorrq_u8(x, vceqq_u8(s, w2));
+        x = vorrq_u8(x, vceqq_u8(s, w3));
+
+        x = vmvnq_u8(x);                       // Negate
+        x = vrev64q_u8(x);                     // Rev in 64
+        uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0);   // extract
+        uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1);  // extract
+
+        if (low == 0) {
+            if (high != 0) {
+                int lz = __builtin_clzll(high);
+                return p + 8 + (lz >> 3);
+            }
+        } else {
+            int lz = __builtin_clzll(low);
+            return p + (lz >> 3);
+        }
+    }
+
+    return SkipWhitespace(p, end);
+}
+
+#endif // RAPIDJSON_NEON
 
 #ifdef RAPIDJSON_SIMD
 //! Template function specialization for InsituStringStream
@@ -513,6 +600,83 @@ class GenericReader {
         return Parse<kParseDefaultFlags>(is, handler);
     }
 
+    //! Initialize JSON text token-by-token parsing
+    /*!
+     */
+    void IterativeParseInit() {
+        parseResult_.Clear();
+        state_ = IterativeParsingStartState;
+    }
+    
+    //! Parse one token from JSON text
+    /*! \tparam InputStream Type of input stream, implementing Stream concept
+        \tparam Handler Type of handler, implementing Handler concept.
+        \param is Input stream to be parsed.
+        \param handler The handler to receive events.
+        \return Whether the parsing is successful.
+     */
+    template <unsigned parseFlags, typename InputStream, typename Handler>
+    bool IterativeParseNext(InputStream& is, Handler& handler) {
+        while (RAPIDJSON_LIKELY(is.Peek() != '\0')) {
+            SkipWhitespaceAndComments<parseFlags>(is);
+            
+            Token t = Tokenize(is.Peek());
+            IterativeParsingState n = Predict(state_, t);
+            IterativeParsingState d = Transit<parseFlags>(state_, t, n, is, handler);
+            
+            // If we've finished or hit an error...
+            if (RAPIDJSON_UNLIKELY(IsIterativeParsingCompleteState(d))) {
+                // Report errors.
+                if (d == IterativeParsingErrorState) {
+                    HandleError(state_, is);
+                    return false;
+                }
+            
+                // Transition to the finish state.
+                RAPIDJSON_ASSERT(d == IterativeParsingFinishState);
+                state_ = d;
+                
+                // If StopWhenDone is not set...
+                if (!(parseFlags & kParseStopWhenDoneFlag)) {
+                    // ... and extra non-whitespace data is found...
+                    SkipWhitespaceAndComments<parseFlags>(is);
+                    if (is.Peek() != '\0') {
+                        // ... this is considered an error.
+                        HandleError(state_, is);
+                        return false;
+                    }
+                }
+                
+                // Success! We are done!
+                return true;
+            }
+            
+            // Transition to the new state.
+            state_ = d;
+
+            // If we parsed anything other than a delimiter, we invoked the handler, so we can return true now.
+            if (!IsIterativeParsingDelimiterState(n))
+                return true;
+        }
+        
+        // We reached the end of file.
+        stack_.Clear();
+
+        if (state_ != IterativeParsingFinishState) {
+            HandleError(state_, is);
+            return false;
+        }
+        
+        return true;
+    }
+    
+    //! Check if token-by-token parsing JSON text is complete
+    /*! \return Whether the JSON has been fully decoded.
+     */
+    RAPIDJSON_FORCEINLINE bool IterativeParseComplete() {
+        return IsIterativeParsingCompleteState(state_);
+    }
+
     //! Whether a parse error has occured in the last parsing.
     bool HasParseError() const { return parseResult_.IsError(); }
 
@@ -878,7 +1042,7 @@ class GenericReader {
                 if (c == '\0')
                     RAPIDJSON_PARSE_ERROR(kParseErrorStringMissQuotationMark, is.Tell());
                 else
-                    RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, is.Tell());
+                    RAPIDJSON_PARSE_ERROR(kParseErrorStringInvalidEncoding, is.Tell());
             }
             else {
                 size_t offset = is.Tell();
@@ -913,7 +1077,7 @@ class GenericReader {
         // The rest of string using SIMD
         static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
         static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
-        static const char space[16]  = { 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 };
+        static const char space[16]  = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
         const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
         const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
         const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
@@ -922,7 +1086,7 @@ class GenericReader {
             const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
             const __m128i t1 = _mm_cmpeq_epi8(s, dq);
             const __m128i t2 = _mm_cmpeq_epi8(s, bs);
-            const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x19) == 0x19
+            const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
             const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
             unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
             if (RAPIDJSON_UNLIKELY(r != 0)) {   // some of characters is escaped
@@ -934,11 +1098,13 @@ class GenericReader {
     #else
                 length = static_cast<SizeType>(__builtin_ffs(r) - 1);
     #endif
-                char* q = reinterpret_cast<char*>(os.Push(length));
-                for (size_t i = 0; i < length; i++)
-                    q[i] = p[i];
+                if (length != 0) {
+                    char* q = reinterpret_cast<char*>(os.Push(length));
+                    for (size_t i = 0; i < length; i++)
+                        q[i] = p[i];
 
-                p += length;
+                    p += length;
+                }
                 break;
             }
             _mm_storeu_si128(reinterpret_cast<__m128i *>(os.Push(16)), s);
@@ -974,7 +1140,7 @@ class GenericReader {
         // The rest of string using SIMD
         static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
         static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
-        static const char space[16] = { 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 };
+        static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
         const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
         const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
         const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
@@ -983,7 +1149,7 @@ class GenericReader {
             const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
             const __m128i t1 = _mm_cmpeq_epi8(s, dq);
             const __m128i t2 = _mm_cmpeq_epi8(s, bs);
-            const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x19) == 0x19
+            const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
             const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
             unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
             if (RAPIDJSON_UNLIKELY(r != 0)) {   // some of characters is escaped
@@ -1022,7 +1188,7 @@ class GenericReader {
         // The rest of string using SIMD
         static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
         static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
-        static const char space[16] = { 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 };
+        static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
         const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
         const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
         const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
@@ -1031,7 +1197,7 @@ class GenericReader {
             const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
             const __m128i t1 = _mm_cmpeq_epi8(s, dq);
             const __m128i t2 = _mm_cmpeq_epi8(s, bs);
-            const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x19) == 0x19
+            const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
             const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
             unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
             if (RAPIDJSON_UNLIKELY(r != 0)) {   // some of characters is escaped
@@ -1050,7 +1216,180 @@ class GenericReader {
 
         is.src_ = is.dst_ = p;
     }
-#endif
+#elif defined(RAPIDJSON_NEON)
+    // StringStream -> StackStream<char>
+    static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(StringStream& is, StackStream<char>& os) {
+        const char* p = is.src_;
+
+        // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
+        const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
+        while (p != nextAligned)
+            if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
+                is.src_ = p;
+                return;
+            }
+            else
+                os.Put(*p++);
+
+        // The rest of string using SIMD
+        const uint8x16_t s0 = vmovq_n_u8('"');
+        const uint8x16_t s1 = vmovq_n_u8('\\');
+        const uint8x16_t s2 = vmovq_n_u8('\b');
+        const uint8x16_t s3 = vmovq_n_u8(32);
+
+        for (;; p += 16) {
+            const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p));
+            uint8x16_t x = vceqq_u8(s, s0);
+            x = vorrq_u8(x, vceqq_u8(s, s1));
+            x = vorrq_u8(x, vceqq_u8(s, s2));
+            x = vorrq_u8(x, vcltq_u8(s, s3));
+
+            x = vrev64q_u8(x);                     // Rev in 64
+            uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0);   // extract
+            uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1);  // extract
+
+            SizeType length = 0;
+            bool escaped = false;
+            if (low == 0) {
+                if (high != 0) {
+                    unsigned lz = (unsigned)__builtin_clzll(high);;
+                    length = 8 + (lz >> 3);
+                    escaped = true;
+                }
+            } else {
+                unsigned lz = (unsigned)__builtin_clzll(low);;
+                length = lz >> 3;
+                escaped = true;
+            }
+            if (RAPIDJSON_UNLIKELY(escaped)) {   // some of characters is escaped
+                if (length != 0) {
+                    char* q = reinterpret_cast<char*>(os.Push(length));
+                    for (size_t i = 0; i < length; i++)
+                        q[i] = p[i];
+
+                    p += length;
+                }
+                break;
+            }
+            vst1q_u8(reinterpret_cast<uint8_t *>(os.Push(16)), s);
+        }
+
+        is.src_ = p;
+    }
+
+    // InsituStringStream -> InsituStringStream
+    static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InsituStringStream& is, InsituStringStream& os) {
+        RAPIDJSON_ASSERT(&is == &os);
+        (void)os;
+
+        if (is.src_ == is.dst_) {
+            SkipUnescapedString(is);
+            return;
+        }
+
+        char* p = is.src_;
+        char *q = is.dst_;
+
+        // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
+        const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
+        while (p != nextAligned)
+            if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
+                is.src_ = p;
+                is.dst_ = q;
+                return;
+            }
+            else
+                *q++ = *p++;
+
+        // The rest of string using SIMD
+        const uint8x16_t s0 = vmovq_n_u8('"');
+        const uint8x16_t s1 = vmovq_n_u8('\\');
+        const uint8x16_t s2 = vmovq_n_u8('\b');
+        const uint8x16_t s3 = vmovq_n_u8(32);
+
+        for (;; p += 16, q += 16) {
+            const uint8x16_t s = vld1q_u8(reinterpret_cast<uint8_t *>(p));
+            uint8x16_t x = vceqq_u8(s, s0);
+            x = vorrq_u8(x, vceqq_u8(s, s1));
+            x = vorrq_u8(x, vceqq_u8(s, s2));
+            x = vorrq_u8(x, vcltq_u8(s, s3));
+
+            x = vrev64q_u8(x);                     // Rev in 64
+            uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0);   // extract
+            uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1);  // extract
+
+            SizeType length = 0;
+            bool escaped = false;
+            if (low == 0) {
+                if (high != 0) {
+                    unsigned lz = (unsigned)__builtin_clzll(high);
+                    length = 8 + (lz >> 3);
+                    escaped = true;
+                }
+            } else {
+                unsigned lz = (unsigned)__builtin_clzll(low);
+                length = lz >> 3;
+                escaped = true;
+            }
+            if (RAPIDJSON_UNLIKELY(escaped)) {   // some of characters is escaped
+                for (const char* pend = p + length; p != pend; ) {
+                    *q++ = *p++;
+                }
+                break;
+            }
+            vst1q_u8(reinterpret_cast<uint8_t *>(q), s);
+        }
+
+        is.src_ = p;
+        is.dst_ = q;
+    }
+
+    // When read/write pointers are the same for insitu stream, just skip unescaped characters
+    static RAPIDJSON_FORCEINLINE void SkipUnescapedString(InsituStringStream& is) {
+        RAPIDJSON_ASSERT(is.src_ == is.dst_);
+        char* p = is.src_;
+
+        // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
+        const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
+        for (; p != nextAligned; p++)
+            if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
+                is.src_ = is.dst_ = p;
+                return;
+            }
+
+        // The rest of string using SIMD
+        const uint8x16_t s0 = vmovq_n_u8('"');
+        const uint8x16_t s1 = vmovq_n_u8('\\');
+        const uint8x16_t s2 = vmovq_n_u8('\b');
+        const uint8x16_t s3 = vmovq_n_u8(32);
+
+        for (;; p += 16) {
+            const uint8x16_t s = vld1q_u8(reinterpret_cast<uint8_t *>(p));
+            uint8x16_t x = vceqq_u8(s, s0);
+            x = vorrq_u8(x, vceqq_u8(s, s1));
+            x = vorrq_u8(x, vceqq_u8(s, s2));
+            x = vorrq_u8(x, vcltq_u8(s, s3));
+
+            x = vrev64q_u8(x);                     // Rev in 64
+            uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0);   // extract
+            uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1);  // extract
+
+            if (low == 0) {
+                if (high != 0) {
+                    int lz = __builtin_clzll(high);
+                    p += 8 + (lz >> 3);
+                    break;
+                }
+            } else {
+                int lz = __builtin_clzll(low);
+                p += lz >> 3;
+                break;
+            }
+        }
+
+        is.src_ = is.dst_ = p;
+    }
+#endif // RAPIDJSON_NEON
 
     template<typename InputStream, bool backup, bool pushOnTake>
     class NumberStream;
@@ -1061,7 +1400,6 @@ class GenericReader {
         typedef typename InputStream::Ch Ch;
 
         NumberStream(GenericReader& reader, InputStream& s) : is(s) { (void)reader;  }
-        ~NumberStream() {}
 
         RAPIDJSON_FORCEINLINE Ch Peek() const { return is.Peek(); }
         RAPIDJSON_FORCEINLINE Ch TakePush() { return is.Take(); }
@@ -1083,7 +1421,6 @@ class GenericReader {
         typedef NumberStream<InputStream, false, false> Base;
     public:
         NumberStream(GenericReader& reader, InputStream& is) : Base(reader, is), stackStream(reader.stack_) {}
-        ~NumberStream() {}
 
         RAPIDJSON_FORCEINLINE Ch TakePush() {
             stackStream.Put(static_cast<char>(Base::is.Peek()));
@@ -1110,7 +1447,6 @@ class GenericReader {
         typedef NumberStream<InputStream, true, false> Base;
     public:
         NumberStream(GenericReader& reader, InputStream& is) : Base(reader, is) {}
-        ~NumberStream() {}
 
         RAPIDJSON_FORCEINLINE Ch Take() { return Base::TakePush(); }
     };
@@ -1171,18 +1507,27 @@ class GenericReader {
         }
         // Parse NaN or Infinity here
         else if ((parseFlags & kParseNanAndInfFlag) && RAPIDJSON_LIKELY((s.Peek() == 'I' || s.Peek() == 'N'))) {
-            useNanOrInf = true;
-            if (RAPIDJSON_LIKELY(Consume(s, 'N') && Consume(s, 'a') && Consume(s, 'N'))) {
-                d = std::numeric_limits<double>::quiet_NaN();
+            if (Consume(s, 'N')) {
+                if (Consume(s, 'a') && Consume(s, 'N')) {
+                    d = std::numeric_limits<double>::quiet_NaN();
+                    useNanOrInf = true;
+                }
             }
-            else if (RAPIDJSON_LIKELY(Consume(s, 'I') && Consume(s, 'n') && Consume(s, 'f'))) {
-                d = (minus ? -std::numeric_limits<double>::infinity() : std::numeric_limits<double>::infinity());
-                if (RAPIDJSON_UNLIKELY(s.Peek() == 'i' && !(Consume(s, 'i') && Consume(s, 'n')
-                                                            && Consume(s, 'i') && Consume(s, 't') && Consume(s, 'y'))))
-                    RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell());
+            else if (RAPIDJSON_LIKELY(Consume(s, 'I'))) {
+                if (Consume(s, 'n') && Consume(s, 'f')) {
+                    d = (minus ? -std::numeric_limits<double>::infinity() : std::numeric_limits<double>::infinity());
+                    useNanOrInf = true;
+
+                    if (RAPIDJSON_UNLIKELY(s.Peek() == 'i' && !(Consume(s, 'i') && Consume(s, 'n')
+                                                                && Consume(s, 'i') && Consume(s, 't') && Consume(s, 'y')))) {
+                        RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell());
+                    }
+                }
             }
-            else
+            
+            if (RAPIDJSON_UNLIKELY(!useNanOrInf)) {
                 RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell());
+            }
         }
         else
             RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell());
@@ -1394,30 +1739,32 @@ class GenericReader {
 
     // States
     enum IterativeParsingState {
-        IterativeParsingStartState = 0,
-        IterativeParsingFinishState,
-        IterativeParsingErrorState,
+        IterativeParsingFinishState = 0, // sink states at top
+        IterativeParsingErrorState,      // sink states at top
+        IterativeParsingStartState,
 
         // Object states
         IterativeParsingObjectInitialState,
         IterativeParsingMemberKeyState,
-        IterativeParsingKeyValueDelimiterState,
         IterativeParsingMemberValueState,
-        IterativeParsingMemberDelimiterState,
         IterativeParsingObjectFinishState,
 
         // Array states
         IterativeParsingArrayInitialState,
         IterativeParsingElementState,
-        IterativeParsingElementDelimiterState,
         IterativeParsingArrayFinishState,
 
         // Single value state
-        IterativeParsingValueState
+        IterativeParsingValueState,
+        
+        // Delimiter states (at bottom)
+        IterativeParsingElementDelimiterState,
+        IterativeParsingMemberDelimiterState,
+        IterativeParsingKeyValueDelimiterState,
+        
+        cIterativeParsingStateCount
     };
 
-    enum { cIterativeParsingStateCount = IterativeParsingValueState + 1 };
-
     // Tokens
     enum Token {
         LeftBracketToken = 0,
@@ -1468,6 +1815,18 @@ class GenericReader {
     RAPIDJSON_FORCEINLINE IterativeParsingState Predict(IterativeParsingState state, Token token) {
         // current state x one lookahead token -> new state
         static const char G[cIterativeParsingStateCount][kTokenCount] = {
+            // Finish(sink state)
+            {
+                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
+                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
+                IterativeParsingErrorState
+            },
+            // Error(sink state)
+            {
+                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
+                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
+                IterativeParsingErrorState
+            },
             // Start
             {
                 IterativeParsingArrayInitialState,  // Left bracket
@@ -1482,18 +1841,6 @@ class GenericReader {
                 IterativeParsingValueState,         // Null
                 IterativeParsingValueState          // Number
             },
-            // Finish(sink state)
-            {
-                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
-                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
-                IterativeParsingErrorState
-            },
-            // Error(sink state)
-            {
-                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
-                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
-                IterativeParsingErrorState
-            },
             // ObjectInitial
             {
                 IterativeParsingErrorState,         // Left bracket
@@ -1522,20 +1869,6 @@ class GenericReader {
                 IterativeParsingErrorState,             // Null
                 IterativeParsingErrorState              // Number
             },
-            // KeyValueDelimiter
-            {
-                IterativeParsingArrayInitialState,      // Left bracket(push MemberValue state)
-                IterativeParsingErrorState,             // Right bracket
-                IterativeParsingObjectInitialState,     // Left curly bracket(push MemberValue state)
-                IterativeParsingErrorState,             // Right curly bracket
-                IterativeParsingErrorState,             // Comma
-                IterativeParsingErrorState,             // Colon
-                IterativeParsingMemberValueState,       // String
-                IterativeParsingMemberValueState,       // False
-                IterativeParsingMemberValueState,       // True
-                IterativeParsingMemberValueState,       // Null
-                IterativeParsingMemberValueState        // Number
-            },
             // MemberValue
             {
                 IterativeParsingErrorState,             // Left bracket
@@ -1550,20 +1883,6 @@ class GenericReader {
                 IterativeParsingErrorState,             // Null
                 IterativeParsingErrorState              // Number
             },
-            // MemberDelimiter
-            {
-                IterativeParsingErrorState,         // Left bracket
-                IterativeParsingErrorState,         // Right bracket
-                IterativeParsingErrorState,         // Left curly bracket
-                IterativeParsingObjectFinishState,  // Right curly bracket
-                IterativeParsingErrorState,         // Comma
-                IterativeParsingErrorState,         // Colon
-                IterativeParsingMemberKeyState,     // String
-                IterativeParsingErrorState,         // False
-                IterativeParsingErrorState,         // True
-                IterativeParsingErrorState,         // Null
-                IterativeParsingErrorState          // Number
-            },
             // ObjectFinish(sink state)
             {
                 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
@@ -1598,6 +1917,18 @@ class GenericReader {
                 IterativeParsingErrorState,             // Null
                 IterativeParsingErrorState              // Number
             },
+            // ArrayFinish(sink state)
+            {
+                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
+                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
+                IterativeParsingErrorState
+            },
+            // Single Value (sink state)
+            {
+                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
+                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
+                IterativeParsingErrorState
+            },
             // ElementDelimiter
             {
                 IterativeParsingArrayInitialState,      // Left bracket(push Element state)
@@ -1612,18 +1943,34 @@ class GenericReader {
                 IterativeParsingElementState,           // Null
                 IterativeParsingElementState            // Number
             },
-            // ArrayFinish(sink state)
+            // MemberDelimiter
             {
-                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
-                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
-                IterativeParsingErrorState
+                IterativeParsingErrorState,         // Left bracket
+                IterativeParsingErrorState,         // Right bracket
+                IterativeParsingErrorState,         // Left curly bracket
+                IterativeParsingObjectFinishState,  // Right curly bracket
+                IterativeParsingErrorState,         // Comma
+                IterativeParsingErrorState,         // Colon
+                IterativeParsingMemberKeyState,     // String
+                IterativeParsingErrorState,         // False
+                IterativeParsingErrorState,         // True
+                IterativeParsingErrorState,         // Null
+                IterativeParsingErrorState          // Number
             },
-            // Single Value (sink state)
+            // KeyValueDelimiter
             {
-                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
-                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
-                IterativeParsingErrorState
-            }
+                IterativeParsingArrayInitialState,      // Left bracket(push MemberValue state)
+                IterativeParsingErrorState,             // Right bracket
+                IterativeParsingObjectInitialState,     // Left curly bracket(push MemberValue state)
+                IterativeParsingErrorState,             // Right curly bracket
+                IterativeParsingErrorState,             // Comma
+                IterativeParsingErrorState,             // Colon
+                IterativeParsingMemberValueState,       // String
+                IterativeParsingMemberValueState,       // False
+                IterativeParsingMemberValueState,       // True
+                IterativeParsingMemberValueState,       // Null
+                IterativeParsingMemberValueState        // Number
+            },
         }; // End of G
 
         return static_cast<IterativeParsingState>(G[state][token]);
@@ -1804,44 +2151,53 @@ class GenericReader {
         }
     }
 
+    RAPIDJSON_FORCEINLINE bool IsIterativeParsingDelimiterState(IterativeParsingState s) {
+        return s >= IterativeParsingElementDelimiterState;
+    }
+    
+    RAPIDJSON_FORCEINLINE bool IsIterativeParsingCompleteState(IterativeParsingState s) {
+        return s <= IterativeParsingErrorState;
+    }
+    
     template <unsigned parseFlags, typename InputStream, typename Handler>
     ParseResult IterativeParse(InputStream& is, Handler& handler) {
         parseResult_.Clear();
         ClearStackOnExit scope(*this);
         IterativeParsingState state = IterativeParsingStartState;
-
+        
         SkipWhitespaceAndComments<parseFlags>(is);
         RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
         while (is.Peek() != '\0') {
             Token t = Tokenize(is.Peek());
             IterativeParsingState n = Predict(state, t);
             IterativeParsingState d = Transit<parseFlags>(state, t, n, is, handler);
-
+            
             if (d == IterativeParsingErrorState) {
                 HandleError(state, is);
                 break;
             }
-
+            
             state = d;
-
+            
             // Do not further consume streams if a root JSON has been parsed.
             if ((parseFlags & kParseStopWhenDoneFlag) && state == IterativeParsingFinishState)
                 break;
-
+            
             SkipWhitespaceAndComments<parseFlags>(is);
             RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
         }
-
+        
         // Handle the end of file.
         if (state != IterativeParsingFinishState)
             HandleError(state, is);
-
+        
         return parseResult_;
     }
 
     static const size_t kDefaultStackCapacity = 256;    //!< Default stack capacity in bytes for storing a single decoded string.
     internal::Stack<StackAllocator> stack_;  //!< A stack for storing decoded string temporarily during non-destructive parsing.
     ParseResult parseResult_;
+    IterativeParsingState state_;
 }; // class GenericReader
 
 //! Reader with UTF8 encoding and default allocator.
diff --git a/include/rapidjson/schema.h b/include/rapidjson/schema.h
index 288b93d0f..abcf1a102 100644
--- a/include/rapidjson/schema.h
+++ b/include/rapidjson/schema.h
@@ -349,6 +349,7 @@ class Schema {
 
     Schema(SchemaDocumentType* schemaDocument, const PointerType& p, const ValueType& value, const ValueType& document, AllocatorType* allocator) :
         allocator_(allocator),
+        typeless_(schemaDocument->GetTypeless()),
         enum_(),
         enumCount_(),
         not_(),
@@ -453,7 +454,7 @@ class Schema {
                 for (SizeType i = 0; i < propertyCount_; i++) {
                     new (&properties_[i]) Property();
                     properties_[i].name = allProperties[i];
-                    properties_[i].schema = GetTypeless();
+                    properties_[i].schema = typeless_;
                 }
             }
         }
@@ -575,9 +576,7 @@ class Schema {
     }
 
     ~Schema() {
-        if (allocator_) {
-            allocator_->Free(enum_);
-        }
+        AllocatorType::Free(enum_);
         if (properties_) {
             for (SizeType i = 0; i < propertyCount_; i++)
                 properties_[i].~Property();
@@ -592,7 +591,7 @@ class Schema {
 #if RAPIDJSON_SCHEMA_HAS_REGEX
         if (pattern_) {
             pattern_->~RegexType();
-            allocator_->Free(pattern_);
+            AllocatorType::Free(pattern_);
         }
 #endif
     }
@@ -610,12 +609,12 @@ class Schema {
                 else if (additionalItemsSchema_)
                     context.valueSchema = additionalItemsSchema_;
                 else if (additionalItems_)
-                    context.valueSchema = GetTypeless();
+                    context.valueSchema = typeless_;
                 else
                     RAPIDJSON_INVALID_KEYWORD_RETURN(GetItemsString());
             }
             else
-                context.valueSchema = GetTypeless();
+                context.valueSchema = typeless_;
 
             context.arrayElementIndex++;
         }
@@ -784,15 +783,17 @@ class Schema {
         if (patternProperties_) {
             context.patternPropertiesSchemaCount = 0;
             for (SizeType i = 0; i < patternPropertyCount_; i++)
-                if (patternProperties_[i].pattern && IsPatternMatch(patternProperties_[i].pattern, str, len))
+                if (patternProperties_[i].pattern && IsPatternMatch(patternProperties_[i].pattern, str, len)) {
                     context.patternPropertiesSchemas[context.patternPropertiesSchemaCount++] = patternProperties_[i].schema;
+                    context.valueSchema = typeless_;
+                }
         }
 
         SizeType index;
         if (FindPropertyIndex(ValueType(str, len).Move(), &index)) {
             if (context.patternPropertiesSchemaCount > 0) {
                 context.patternPropertiesSchemas[context.patternPropertiesSchemaCount++] = properties_[index].schema;
-                context.valueSchema = GetTypeless();
+                context.valueSchema = typeless_;
                 context.valuePatternValidatorType = Context::kPatternValidatorWithProperty;
             }
             else
@@ -807,7 +808,7 @@ class Schema {
         if (additionalPropertiesSchema_) {
             if (additionalPropertiesSchema_ && context.patternPropertiesSchemaCount > 0) {
                 context.patternPropertiesSchemas[context.patternPropertiesSchemaCount++] = additionalPropertiesSchema_;
-                context.valueSchema = GetTypeless();
+                context.valueSchema = typeless_;
                 context.valuePatternValidatorType = Context::kPatternValidatorWithAdditionalProperty;
             }
             else
@@ -815,7 +816,7 @@ class Schema {
             return true;
         }
         else if (additionalProperties_) {
-            context.valueSchema = GetTypeless();
+            context.valueSchema = typeless_;
             return true;
         }
 
@@ -881,7 +882,7 @@ class Schema {
 #define RAPIDJSON_STRING_(name, ...) \
     static const ValueType& Get##name##String() {\
         static const Ch s[] = { __VA_ARGS__, '\0' };\
-        static const ValueType v(s, sizeof(s) / sizeof(Ch) - 1);\
+        static const ValueType v(s, static_cast<SizeType>(sizeof(s) / sizeof(Ch) - 1));\
         return v;\
     }
 
@@ -949,11 +950,6 @@ class Schema {
         SizeType count;
     };
 
-    static const SchemaType* GetTypeless() {
-        static SchemaType typeless(0, PointerType(), ValueType(kObjectType).Move(), ValueType(kObjectType).Move(), 0);
-        return &typeless;
-    }
-
     template <typename V1, typename V2>
     void AddUniqueElement(V1& a, const V2& v) {
         for (typename V1::ConstValueIterator itr = a.Begin(); itr != a.End(); ++itr)
@@ -1118,8 +1114,8 @@ class Schema {
                 if (exclusiveMaximum_ ? i >= maximum_.GetInt64() : i > maximum_.GetInt64())
                     RAPIDJSON_INVALID_KEYWORD_RETURN(GetMaximumString());
             }
-            else if (maximum_.IsUint64())
-                /* do nothing */; // i <= max(int64_t) < maximum_.GetUint64()
+            else if (maximum_.IsUint64()) { }
+                /* do nothing */ // i <= max(int64_t) < maximum_.GetUint64()
             else if (!CheckDoubleMaximum(context, static_cast<double>(i)))
                 return false;
         }
@@ -1219,6 +1215,7 @@ class Schema {
     };
 
     AllocatorType* allocator_;
+    const SchemaType* typeless_;
     uint64_t* enum_;
     SizeType enumCount_;
     SchemaArray allOf_;
@@ -1268,7 +1265,7 @@ struct TokenHelper {
         char buffer[21];
         size_t length = static_cast<size_t>((sizeof(SizeType) == 4 ? u32toa(index, buffer) : u64toa(index, buffer)) - buffer);
         for (size_t i = 0; i < length; i++)
-            *documentStack.template Push<Ch>() = buffer[i];
+            *documentStack.template Push<Ch>() = static_cast<Ch>(buffer[i]);
     }
 };
 
@@ -1344,11 +1341,15 @@ class GenericSchemaDocument {
         allocator_(allocator),
         ownAllocator_(),
         root_(),
+        typeless_(),
         schemaMap_(allocator, kInitialSchemaMapSize),
         schemaRef_(allocator, kInitialSchemaRefSize)
     {
         if (!allocator_)
-            ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator());
+            ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)();
+
+        typeless_ = static_cast<SchemaType*>(allocator_->Malloc(sizeof(SchemaType)));
+        new (typeless_) SchemaType(this, PointerType(), ValueType(kObjectType).Move(), ValueType(kObjectType).Move(), 0);
 
         // Generate root schema, it will call CreateSchema() to create sub-schemas,
         // And call AddRefSchema() if there are $ref.
@@ -1366,6 +1367,9 @@ class GenericSchemaDocument {
                     new (schemaMap_.template Push<SchemaEntry>()) SchemaEntry(refEntry->source, const_cast<SchemaType*>(s), false, allocator_);
                 }
             }
+            else if (refEntry->schema)
+                *refEntry->schema = typeless_;
+
             refEntry->~SchemaRefEntry();
         }
 
@@ -1381,12 +1385,14 @@ class GenericSchemaDocument {
         allocator_(rhs.allocator_),
         ownAllocator_(rhs.ownAllocator_),
         root_(rhs.root_),
+        typeless_(rhs.typeless_),
         schemaMap_(std::move(rhs.schemaMap_)),
         schemaRef_(std::move(rhs.schemaRef_))
     {
         rhs.remoteProvider_ = 0;
         rhs.allocator_ = 0;
         rhs.ownAllocator_ = 0;
+        rhs.typeless_ = 0;
     }
 #endif
 
@@ -1395,6 +1401,11 @@ class GenericSchemaDocument {
         while (!schemaMap_.Empty())
             schemaMap_.template Pop<SchemaEntry>(1)->~SchemaEntry();
 
+        if (typeless_) {
+            typeless_->~SchemaType();
+            Allocator::Free(typeless_);
+        }
+
         RAPIDJSON_DELETE(ownAllocator_);
     }
 
@@ -1429,7 +1440,7 @@ class GenericSchemaDocument {
 
     void CreateSchemaRecursive(const SchemaType** schema, const PointerType& pointer, const ValueType& v, const ValueType& document) {
         if (schema)
-            *schema = SchemaType::GetTypeless();
+            *schema = typeless_;
 
         if (v.GetType() == kObjectType) {
             const SchemaType* s = GetSchema(pointer);
@@ -1516,6 +1527,8 @@ class GenericSchemaDocument {
         return PointerType();
     }
 
+    const SchemaType* GetTypeless() const { return typeless_; }
+
     static const size_t kInitialSchemaMapSize = 64;
     static const size_t kInitialSchemaRefSize = 64;
 
@@ -1523,6 +1536,7 @@ class GenericSchemaDocument {
     Allocator *allocator_;
     Allocator *ownAllocator_;
     const SchemaType* root_;                //!< Root schema.
+    SchemaType* typeless_;
     internal::Stack<Allocator> schemaMap_;  // Stores created Pointer -> Schemas
     internal::Stack<Allocator> schemaRef_;  // Stores Pointer from $ref and schema which holds the $ref
 };
@@ -1576,11 +1590,11 @@ class GenericSchemaValidator :
         :
         schemaDocument_(&schemaDocument),
         root_(schemaDocument.GetRoot()),
-        outputHandler_(GetNullHandler()),
         stateAllocator_(allocator),
         ownStateAllocator_(0),
         schemaStack_(allocator, schemaStackCapacity),
         documentStack_(allocator, documentStackCapacity),
+        outputHandler_(0),
         valid_(true)
 #if RAPIDJSON_SCHEMA_VERBOSE
         , depth_(0)
@@ -1604,11 +1618,11 @@ class GenericSchemaValidator :
         :
         schemaDocument_(&schemaDocument),
         root_(schemaDocument.GetRoot()),
-        outputHandler_(outputHandler),
         stateAllocator_(allocator),
         ownStateAllocator_(0),
         schemaStack_(allocator, schemaStackCapacity),
         documentStack_(allocator, documentStackCapacity),
+        outputHandler_(&outputHandler),
         valid_(true)
 #if RAPIDJSON_SCHEMA_VERBOSE
         , depth_(0)
@@ -1680,7 +1694,7 @@ RAPIDJSON_MULTILINEMACRO_END
     }
 
 #define RAPIDJSON_SCHEMA_HANDLE_END_(method, arg2)\
-    return valid_ = EndValue() && outputHandler_.method arg2
+    return valid_ = EndValue() && (!outputHandler_ || outputHandler_->method arg2)
 
 #define RAPIDJSON_SCHEMA_HANDLE_VALUE_(method, arg1, arg2) \
     RAPIDJSON_SCHEMA_HANDLE_BEGIN_   (method, arg1);\
@@ -1702,7 +1716,7 @@ RAPIDJSON_MULTILINEMACRO_END
     bool StartObject() {
         RAPIDJSON_SCHEMA_HANDLE_BEGIN_(StartObject, (CurrentContext()));
         RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(StartObject, ());
-        return valid_ = outputHandler_.StartObject();
+        return valid_ = !outputHandler_ || outputHandler_->StartObject();
     }
     
     bool Key(const Ch* str, SizeType len, bool copy) {
@@ -1710,7 +1724,7 @@ RAPIDJSON_MULTILINEMACRO_END
         AppendToken(str, len);
         if (!CurrentSchema().Key(CurrentContext(), str, len, copy)) return valid_ = false;
         RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(Key, (str, len, copy));
-        return valid_ = outputHandler_.Key(str, len, copy);
+        return valid_ = !outputHandler_ || outputHandler_->Key(str, len, copy);
     }
     
     bool EndObject(SizeType memberCount) { 
@@ -1723,7 +1737,7 @@ RAPIDJSON_MULTILINEMACRO_END
     bool StartArray() {
         RAPIDJSON_SCHEMA_HANDLE_BEGIN_(StartArray, (CurrentContext()));
         RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(StartArray, ());
-        return valid_ = outputHandler_.StartArray();
+        return valid_ = !outputHandler_ || outputHandler_->StartArray();
     }
     
     bool EndArray(SizeType elementCount) {
@@ -1772,7 +1786,7 @@ RAPIDJSON_MULTILINEMACRO_END
     }
 
     virtual void FreeState(void* p) {
-        return StateAllocator::Free(p);
+        StateAllocator::Free(p);
     }
 
 private:
@@ -1792,11 +1806,11 @@ RAPIDJSON_MULTILINEMACRO_END
         :
         schemaDocument_(&schemaDocument),
         root_(root),
-        outputHandler_(GetNullHandler()),
         stateAllocator_(allocator),
         ownStateAllocator_(0),
         schemaStack_(allocator, schemaStackCapacity),
         documentStack_(allocator, documentStackCapacity),
+        outputHandler_(0),
         valid_(true)
 #if RAPIDJSON_SCHEMA_VERBOSE
         , depth_(depth)
@@ -1806,7 +1820,7 @@ RAPIDJSON_MULTILINEMACRO_END
 
     StateAllocator& GetStateAllocator() {
         if (!stateAllocator_)
-            stateAllocator_ = ownStateAllocator_ = RAPIDJSON_NEW(StateAllocator());
+            stateAllocator_ = ownStateAllocator_ = RAPIDJSON_NEW(StateAllocator)();
         return *stateAllocator_;
     }
 
@@ -1824,8 +1838,8 @@ RAPIDJSON_MULTILINEMACRO_END
             const SchemaType** sa = CurrentContext().patternPropertiesSchemas;
             typename Context::PatternValidatorType patternValidatorType = CurrentContext().valuePatternValidatorType;
             bool valueUniqueness = CurrentContext().valueUniqueness;
-            if (CurrentContext().valueSchema)
-                PushSchema(*CurrentContext().valueSchema);
+            RAPIDJSON_ASSERT(CurrentContext().valueSchema);
+            PushSchema(*CurrentContext().valueSchema);
 
             if (count > 0) {
                 CurrentContext().objectPatternValidatorType = patternValidatorType;
@@ -1910,20 +1924,15 @@ RAPIDJSON_MULTILINEMACRO_END
     Context& CurrentContext() { return *schemaStack_.template Top<Context>(); }
     const Context& CurrentContext() const { return *schemaStack_.template Top<Context>(); }
 
-    static OutputHandler& GetNullHandler() {
-        static OutputHandler nullHandler;
-        return nullHandler;
-    }
-
     static const size_t kDefaultSchemaStackCapacity = 1024;
     static const size_t kDefaultDocumentStackCapacity = 256;
     const SchemaDocumentType* schemaDocument_;
     const SchemaType& root_;
-    OutputHandler& outputHandler_;
     StateAllocator* stateAllocator_;
     StateAllocator* ownStateAllocator_;
     internal::Stack<StateAllocator> schemaStack_;    //!< stack to store the current path of schema (BaseSchemaType *)
     internal::Stack<StateAllocator> documentStack_;  //!< stack to store the current path of validating document (Ch)
+    OutputHandler* outputHandler_;
     bool valid_;
 #if RAPIDJSON_SCHEMA_VERBOSE
     unsigned depth_;
diff --git a/include/rapidjson/writer.h b/include/rapidjson/writer.h
index 8f6e174f3..e610ebb60 100644
--- a/include/rapidjson/writer.h
+++ b/include/rapidjson/writer.h
@@ -16,6 +16,7 @@
 #define RAPIDJSON_WRITER_H_
 
 #include "stream.h"
+#include "internal/meta.h"
 #include "internal/stack.h"
 #include "internal/strfunc.h"
 #include "internal/dtoa.h"
@@ -31,6 +32,8 @@
 #include <nmmintrin.h>
 #elif defined(RAPIDJSON_SSE2)
 #include <emmintrin.h>
+#elif defined(RAPIDJSON_NEON)
+#include <arm_neon.h>
 #endif
 
 #ifdef _MSC_VER
@@ -219,10 +222,18 @@ class Writer {
 
     bool Key(const Ch* str, SizeType length, bool copy = false) { return String(str, length, copy); }
 
+#if RAPIDJSON_HAS_STDSTRING
+    bool Key(const std::basic_string<Ch>& str)
+    {
+      return Key(str.data(), SizeType(str.size()));
+    }
+#endif
+	
     bool EndObject(SizeType memberCount = 0) {
         (void)memberCount;
-        RAPIDJSON_ASSERT(level_stack_.GetSize() >= sizeof(Level));
-        RAPIDJSON_ASSERT(!level_stack_.template Top<Level>()->inArray);
+        RAPIDJSON_ASSERT(level_stack_.GetSize() >= sizeof(Level)); // not inside an Object
+        RAPIDJSON_ASSERT(!level_stack_.template Top<Level>()->inArray); // currently inside an Array, not Object
+        RAPIDJSON_ASSERT(0 == level_stack_.template Top<Level>()->valueCount % 2); // Object has a Key without a Value
         level_stack_.template Pop<Level>(1);
         return EndValue(WriteEndObject());
     }
@@ -246,9 +257,9 @@ class Writer {
     //@{
 
     //! Simpler but slower overload.
-    bool String(const Ch* str) { return String(str, internal::StrLen(str)); }
-    bool Key(const Ch* str) { return Key(str, internal::StrLen(str)); }
-
+    bool String(const Ch* const& str) { return String(str, internal::StrLen(str)); }
+    bool Key(const Ch* const& str) { return Key(str, internal::StrLen(str)); }
+    
     //@}
 
     //! Write a raw JSON value.
@@ -265,6 +276,14 @@ class Writer {
         return EndValue(WriteRawValue(json, length));
     }
 
+    //! Flush the output stream.
+    /*!
+        Allows the user to flush the output stream immediately.
+     */
+    void Flush() {
+        os_->Flush();
+    }
+
 protected:
     //! Information for each nested level
     struct Level {
@@ -297,7 +316,7 @@ class Writer {
         const char* end = internal::i32toa(i, buffer);
         PutReserve(*os_, static_cast<size_t>(end - buffer));
         for (const char* p = buffer; p != end; ++p)
-            PutUnsafe(*os_, static_cast<typename TargetEncoding::Ch>(*p));
+            PutUnsafe(*os_, static_cast<typename OutputStream::Ch>(*p));
         return true;
     }
 
@@ -306,7 +325,7 @@ class Writer {
         const char* end = internal::u32toa(u, buffer);
         PutReserve(*os_, static_cast<size_t>(end - buffer));
         for (const char* p = buffer; p != end; ++p)
-            PutUnsafe(*os_, static_cast<typename TargetEncoding::Ch>(*p));
+            PutUnsafe(*os_, static_cast<typename OutputStream::Ch>(*p));
         return true;
     }
 
@@ -315,7 +334,7 @@ class Writer {
         const char* end = internal::i64toa(i64, buffer);
         PutReserve(*os_, static_cast<size_t>(end - buffer));
         for (const char* p = buffer; p != end; ++p)
-            PutUnsafe(*os_, static_cast<typename TargetEncoding::Ch>(*p));
+            PutUnsafe(*os_, static_cast<typename OutputStream::Ch>(*p));
         return true;
     }
 
@@ -324,7 +343,7 @@ class Writer {
         char* end = internal::u64toa(u64, buffer);
         PutReserve(*os_, static_cast<size_t>(end - buffer));
         for (char* p = buffer; p != end; ++p)
-            PutUnsafe(*os_, static_cast<typename TargetEncoding::Ch>(*p));
+            PutUnsafe(*os_, static_cast<typename OutputStream::Ch>(*p));
         return true;
     }
 
@@ -352,12 +371,12 @@ class Writer {
         char* end = internal::dtoa(d, buffer, maxDecimalPlaces_);
         PutReserve(*os_, static_cast<size_t>(end - buffer));
         for (char* p = buffer; p != end; ++p)
-            PutUnsafe(*os_, static_cast<typename TargetEncoding::Ch>(*p));
+            PutUnsafe(*os_, static_cast<typename OutputStream::Ch>(*p));
         return true;
     }
 
     bool WriteString(const Ch* str, SizeType length)  {
-        static const typename TargetEncoding::Ch hexDigits[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
+        static const typename OutputStream::Ch hexDigits[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
         static const char escape[256] = {
 #define Z16 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
             //0    1    2    3    4    5    6    7    8    9    A    B    C    D    E    F
@@ -413,7 +432,7 @@ class Writer {
             else if ((sizeof(Ch) == 1 || static_cast<unsigned>(c) < 256) && RAPIDJSON_UNLIKELY(escape[static_cast<unsigned char>(c)]))  {
                 is.Take();
                 PutUnsafe(*os_, '\\');
-                PutUnsafe(*os_, static_cast<typename TargetEncoding::Ch>(escape[static_cast<unsigned char>(c)]));
+                PutUnsafe(*os_, static_cast<typename OutputStream::Ch>(escape[static_cast<unsigned char>(c)]));
                 if (escape[static_cast<unsigned char>(c)] == 'u') {
                     PutUnsafe(*os_, '0');
                     PutUnsafe(*os_, '0');
@@ -471,7 +490,7 @@ class Writer {
     // Flush the value if it is the top level one.
     bool EndValue(bool ret) {
         if (RAPIDJSON_UNLIKELY(level_stack_.Empty()))   // end of json text
-            os_->Flush();
+            Flush();
         return ret;
     }
 
@@ -575,7 +594,7 @@ inline bool Writer<StringBuffer>::ScanWriteUnescapedString(StringStream& is, siz
     // The rest of string using SIMD
     static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
     static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
-    static const char space[16]  = { 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 };
+    static const char space[16]  = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
     const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
     const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
     const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
@@ -584,7 +603,7 @@ inline bool Writer<StringBuffer>::ScanWriteUnescapedString(StringStream& is, siz
         const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
         const __m128i t1 = _mm_cmpeq_epi8(s, dq);
         const __m128i t2 = _mm_cmpeq_epi8(s, bs);
-        const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x19) == 0x19
+        const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
         const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
         unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
         if (RAPIDJSON_UNLIKELY(r != 0)) {   // some of characters is escaped
@@ -609,7 +628,75 @@ inline bool Writer<StringBuffer>::ScanWriteUnescapedString(StringStream& is, siz
     is.src_ = p;
     return RAPIDJSON_LIKELY(is.Tell() < length);
 }
-#endif // defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42)
+#elif defined(RAPIDJSON_NEON)
+template<>
+inline bool Writer<StringBuffer>::ScanWriteUnescapedString(StringStream& is, size_t length) {
+    if (length < 16)
+        return RAPIDJSON_LIKELY(is.Tell() < length);
+
+    if (!RAPIDJSON_LIKELY(is.Tell() < length))
+        return false;
+
+    const char* p = is.src_;
+    const char* end = is.head_ + length;
+    const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
+    const char* endAligned = reinterpret_cast<const char*>(reinterpret_cast<size_t>(end) & static_cast<size_t>(~15));
+    if (nextAligned > end)
+        return true;
+
+    while (p != nextAligned)
+        if (*p < 0x20 || *p == '\"' || *p == '\\') {
+            is.src_ = p;
+            return RAPIDJSON_LIKELY(is.Tell() < length);
+        }
+        else
+            os_->PutUnsafe(*p++);
+
+    // The rest of string using SIMD
+    const uint8x16_t s0 = vmovq_n_u8('"');
+    const uint8x16_t s1 = vmovq_n_u8('\\');
+    const uint8x16_t s2 = vmovq_n_u8('\b');
+    const uint8x16_t s3 = vmovq_n_u8(32);
+
+    for (; p != endAligned; p += 16) {
+        const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p));
+        uint8x16_t x = vceqq_u8(s, s0);
+        x = vorrq_u8(x, vceqq_u8(s, s1));
+        x = vorrq_u8(x, vceqq_u8(s, s2));
+        x = vorrq_u8(x, vcltq_u8(s, s3));
+
+        x = vrev64q_u8(x);                     // Rev in 64
+        uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0);   // extract
+        uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1);  // extract
+
+        SizeType len = 0;
+        bool escaped = false;
+        if (low == 0) {
+            if (high != 0) {
+                unsigned lz = (unsigned)__builtin_clzll(high);
+                len = 8 + (lz >> 3);
+                escaped = true;
+            }
+        } else {
+            unsigned lz = (unsigned)__builtin_clzll(low);
+            len = lz >> 3;
+            escaped = true;
+        }
+        if (RAPIDJSON_UNLIKELY(escaped)) {   // some of characters is escaped
+            char* q = reinterpret_cast<char*>(os_->PushUnsafe(len));
+            for (size_t i = 0; i < len; i++)
+                q[i] = p[i];
+
+            p += len;
+            break;
+        }
+        vst1q_u8(reinterpret_cast<uint8_t *>(os_->PushUnsafe(16)), s);
+    }
+
+    is.src_ = p;
+    return RAPIDJSON_LIKELY(is.Tell() < length);
+}
+#endif // RAPIDJSON_NEON
 
 RAPIDJSON_NAMESPACE_END
 
diff --git a/library.json b/library.json
index 47fd352ac..21d6bcecf 100644
--- a/library.json
+++ b/library.json
@@ -1,8 +1,11 @@
 {
   "name": "RapidJSON",
+  "version": "1.1.0",
   "keywords": "json, sax, dom, parser, generator",
   "description": "A fast JSON parser/generator for C++ with both SAX/DOM style API",
-  "include": "include",
+  "export": {
+    "include": "include"
+  },
   "examples": "example/*/*.cpp",
   "repository":
   {
diff --git a/rapidjson.autopkg b/rapidjson.autopkg
index 70eb0d8a0..486ad1434 100644
--- a/rapidjson.autopkg
+++ b/rapidjson.autopkg
@@ -71,5 +71,7 @@ Changed
 	targets {
 		// We're trying to be standard about these sorts of thing. (Will help with config.h later :D)
 		//Defines += HAS_EQCORE;
+		// Fix creating the package with Raggles' fork of CoApp
+		Includes += "$(MSBuildThisFileDirectory)../..${d_include}";
 	};
 }
\ No newline at end of file
diff --git a/readme.md b/readme.md
index 4a1d64d0a..293761990 100644
--- a/readme.md
+++ b/readme.md
@@ -1,8 +1,8 @@
-![](doc/logo/rapidjson.png)
+![RapidJSON logo](doc/logo/rapidjson.png)
 
-![](https://img.shields.io/badge/release-v1.1.0-blue.png)
+![Release version](https://img.shields.io/badge/release-v1.1.0-blue.svg)
 
-## A fast JSON parser/generator for C++ with both SAX/DOM style API 
+## A fast JSON parser/generator for C++ with both SAX/DOM style API
 
 Tencent is pleased to support the open source community by making RapidJSON available.
 
@@ -20,12 +20,12 @@ Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights
 | :---------------: | :-----------------: | :-------------------: |
 | ![lin-badge]      | ![win-badge]        | ![cov-badge]          |
 
-[lin-badge]: https://travis-ci.org/miloyip/rapidjson.png?branch=master "Travis build status"
+[lin-badge]: https://travis-ci.org/miloyip/rapidjson.svg?branch=master "Travis build status"
 [lin-link]:  https://travis-ci.org/miloyip/rapidjson "Travis build status"
-[win-badge]: https://ci.appveyor.com/api/projects/status/u658dcuwxo14a8m9/branch/master "AppVeyor build status"
+[win-badge]: https://ci.appveyor.com/api/projects/status/github/miloyip/rapidjson?branch=master&svg=true "AppVeyor build status"
 [win-link]:  https://ci.appveyor.com/project/miloyip/rapidjson/branch/master "AppVeyor build status"
-[cov-badge]: https://coveralls.io/repos/miloyip/rapidjson/badge.png?branch=master
-[cov-link]:  https://coveralls.io/r/miloyip/rapidjson?branch=master
+[cov-badge]: https://coveralls.io/repos/miloyip/rapidjson/badge.svg?branch=master "Coveralls coverage"
+[cov-link]:  https://coveralls.io/r/miloyip/rapidjson?branch=master "Coveralls coverage"
 
 ## Introduction
 
@@ -45,8 +45,8 @@ More features can be read [here](doc/features.md).
 
 JSON(JavaScript Object Notation) is a light-weight data exchange format. RapidJSON should be in fully compliance with RFC7159/ECMA-404, with optional support of relaxed syntax. More information about JSON can be obtained at
 * [Introducing JSON](http://json.org/)
-* [RFC7159: The JavaScript Object Notation (JSON) Data Interchange Format](http://www.ietf.org/rfc/rfc7159.txt)
-* [Standard ECMA-404: The JSON Data Interchange Format](http://www.ecma-international.org/publications/standards/Ecma-404.htm)
+* [RFC7159: The JavaScript Object Notation (JSON) Data Interchange Format](https://tools.ietf.org/html/rfc7159)
+* [Standard ECMA-404: The JSON Data Interchange Format](https://www.ecma-international.org/publications/standards/Ecma-404.htm)
 
 ## Highlights in v1.1 (2016-8-25)
 
@@ -74,8 +74,8 @@ RapidJSON is a header-only C++ library. Just copy the `include/rapidjson` folder
 
 RapidJSON uses following software as its dependencies:
 * [CMake](https://cmake.org/) as a general build tool
-* (optional)[Doxygen](http://www.doxygen.org) to build documentation
-* (optional)[googletest](https://github.com/google/googletest) for unit and performance testing
+* (optional) [Doxygen](http://www.doxygen.org) to build documentation
+* (optional) [googletest](https://github.com/google/googletest) for unit and performance testing
 
 To generate user documentation and run tests please proceed with the steps below:
 
@@ -84,7 +84,7 @@ To generate user documentation and run tests please proceed with the steps below
 3. Change to `build` directory and run `cmake ..` command to configure your build. Windows users can do the same with cmake-gui application.
 4. On Windows, build the solution found in the build directory. On Linux, run `make` from the build directory.
 
-On successfull build you will find compiled test and example binaries in `bin`
+On successful build you will find compiled test and example binaries in `bin`
 directory. The generated documentation will be available in `doc/html`
 directory of the build tree. To run tests after finished build please run `make
 test` or `ctest` from your build tree. You can get detailed output using `ctest
@@ -139,22 +139,22 @@ The following diagram shows the process.
 More [examples](https://github.com/miloyip/rapidjson/tree/master/example) are available:
 
 * DOM API
- * [tutorial](https://github.com/miloyip/rapidjson/blob/master/example/tutorial/tutorial.cpp): Basic usage of DOM API.
+  * [tutorial](https://github.com/miloyip/rapidjson/blob/master/example/tutorial/tutorial.cpp): Basic usage of DOM API.
 
 * SAX API
- * [simplereader](https://github.com/miloyip/rapidjson/blob/master/example/simplereader/simplereader.cpp): Dumps all SAX events while parsing a JSON by `Reader`.
- * [condense](https://github.com/miloyip/rapidjson/blob/master/example/condense/condense.cpp): A command line tool to rewrite a JSON, with all whitespaces removed.
- * [pretty](https://github.com/miloyip/rapidjson/blob/master/example/pretty/pretty.cpp): A command line tool to rewrite a JSON with indents and newlines by `PrettyWriter`.
- * [capitalize](https://github.com/miloyip/rapidjson/blob/master/example/capitalize/capitalize.cpp): A command line tool to capitalize strings in JSON.
- * [messagereader](https://github.com/miloyip/rapidjson/blob/master/example/messagereader/messagereader.cpp): Parse a JSON message with SAX API.
- * [serialize](https://github.com/miloyip/rapidjson/blob/master/example/serialize/serialize.cpp): Serialize a C++ object into JSON with SAX API.
- * [jsonx](https://github.com/miloyip/rapidjson/blob/master/example/jsonx/jsonx.cpp): Implements a `JsonxWriter` which stringify SAX events into [JSONx](https://www-01.ibm.com/support/knowledgecenter/SS9H2Y_7.1.0/com.ibm.dp.doc/json_jsonx.html) (a kind of XML) format. The example is a command line tool which converts input JSON into JSONx format.
+  * [simplereader](https://github.com/miloyip/rapidjson/blob/master/example/simplereader/simplereader.cpp): Dumps all SAX events while parsing a JSON by `Reader`.
+  * [condense](https://github.com/miloyip/rapidjson/blob/master/example/condense/condense.cpp): A command line tool to rewrite a JSON, with all whitespaces removed.
+  * [pretty](https://github.com/miloyip/rapidjson/blob/master/example/pretty/pretty.cpp): A command line tool to rewrite a JSON with indents and newlines by `PrettyWriter`.
+  * [capitalize](https://github.com/miloyip/rapidjson/blob/master/example/capitalize/capitalize.cpp): A command line tool to capitalize strings in JSON.
+  * [messagereader](https://github.com/miloyip/rapidjson/blob/master/example/messagereader/messagereader.cpp): Parse a JSON message with SAX API.
+  * [serialize](https://github.com/miloyip/rapidjson/blob/master/example/serialize/serialize.cpp): Serialize a C++ object into JSON with SAX API.
+  * [jsonx](https://github.com/miloyip/rapidjson/blob/master/example/jsonx/jsonx.cpp): Implements a `JsonxWriter` which stringify SAX events into [JSONx](https://www-01.ibm.com/support/knowledgecenter/SS9H2Y_7.1.0/com.ibm.dp.doc/json_jsonx.html) (a kind of XML) format. The example is a command line tool which converts input JSON into JSONx format.
 
 * Schema
- * [schemavalidator](https://github.com/miloyip/rapidjson/blob/master/example/schemavalidator/schemavalidator.cpp) : A command line tool to validate a JSON with a JSON schema.
- 
+  * [schemavalidator](https://github.com/miloyip/rapidjson/blob/master/example/schemavalidator/schemavalidator.cpp) : A command line tool to validate a JSON with a JSON schema.
+
 * Advanced
- * [prettyauto](https://github.com/miloyip/rapidjson/blob/master/example/prettyauto/prettyauto.cpp): A modified version of [pretty](https://github.com/miloyip/rapidjson/blob/master/example/pretty/pretty.cpp) to automatically handle JSON with any UTF encodings.
- * [parsebyparts](https://github.com/miloyip/rapidjson/blob/master/example/parsebyparts/parsebyparts.cpp): Implements an `AsyncDocumentParser` which can parse JSON in parts, using C++11 thread.
- * [filterkey](https://github.com/miloyip/rapidjson/blob/master/example/filterkey/filterkey.cpp): A command line tool to remove all values with user-specified key.
- * [filterkeydom](https://github.com/miloyip/rapidjson/blob/master/example/filterkeydom/filterkeydom.cpp): Same tool as above, but it demonstrates how to use a generator to populate a `Document`.
+  * [prettyauto](https://github.com/miloyip/rapidjson/blob/master/example/prettyauto/prettyauto.cpp): A modified version of [pretty](https://github.com/miloyip/rapidjson/blob/master/example/pretty/pretty.cpp) to automatically handle JSON with any UTF encodings.
+  * [parsebyparts](https://github.com/miloyip/rapidjson/blob/master/example/parsebyparts/parsebyparts.cpp): Implements an `AsyncDocumentParser` which can parse JSON in parts, using C++11 thread.
+  * [filterkey](https://github.com/miloyip/rapidjson/blob/master/example/filterkey/filterkey.cpp): A command line tool to remove all values with user-specified key.
+  * [filterkeydom](https://github.com/miloyip/rapidjson/blob/master/example/filterkeydom/filterkeydom.cpp): Same tool as above, but it demonstrates how to use a generator to populate a `Document`.
diff --git a/readme.zh-cn.md b/readme.zh-cn.md
index b62b2e132..81b84bb48 100644
--- a/readme.zh-cn.md
+++ b/readme.zh-cn.md
@@ -1,6 +1,6 @@
-![](doc/logo/rapidjson.png)
+![RapidJSON logo](doc/logo/rapidjson.png)
 
-![](https://img.shields.io/badge/release-v1.1.0-blue.png)
+![Release version](https://img.shields.io/badge/release-v1.1.0-blue.svg)
 
 ## 高效的 C++ JSON 解析／生成器，提供 SAX 及 DOM 风格 API
 
@@ -20,12 +20,12 @@ Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights
 | :---------------: | :-----------------: | :-------------------: |
 | ![lin-badge]      | ![win-badge]        | ![cov-badge]          |
 
-[lin-badge]: https://travis-ci.org/miloyip/rapidjson.png?branch=master "Travis build status"
+[lin-badge]: https://travis-ci.org/miloyip/rapidjson.svg?branch=master "Travis build status"
 [lin-link]:  https://travis-ci.org/miloyip/rapidjson "Travis build status"
-[win-badge]: https://ci.appveyor.com/api/projects/status/u658dcuwxo14a8m9/branch/master "AppVeyor build status"
+[win-badge]: https://ci.appveyor.com/api/projects/status/github/miloyip/rapidjson?branch=master&svg=true "AppVeyor build status"
 [win-link]:  https://ci.appveyor.com/project/miloyip/rapidjson/branch/master "AppVeyor build status"
-[cov-badge]: https://coveralls.io/repos/miloyip/rapidjson/badge.png?branch=master
-[cov-link]:  https://coveralls.io/r/miloyip/rapidjson?branch=master
+[cov-badge]: https://coveralls.io/repos/miloyip/rapidjson/badge.svg?branch=master "Coveralls coverage"
+[cov-link]:  https://coveralls.io/r/miloyip/rapidjson?branch=master "Coveralls coverage"
 
 ## 简介
 
@@ -45,8 +45,8 @@ RapidJSON 是一个 C++ 的 JSON 解析器及生成器。它的灵感来自 [Rap
 
 JSON（JavaScript Object Notation）是一个轻量的数据交换格式。RapidJSON 应该完全遵从 RFC7159/ECMA-404，并支持可选的放宽语法。 关于 JSON 的更多信息可参考：
 * [Introducing JSON](http://json.org/)
-* [RFC7159: The JavaScript Object Notation (JSON) Data Interchange Format](http://www.ietf.org/rfc/rfc7159.txt)
-* [Standard ECMA-404: The JSON Data Interchange Format](http://www.ecma-international.org/publications/standards/Ecma-404.htm)
+* [RFC7159: The JavaScript Object Notation (JSON) Data Interchange Format](https://tools.ietf.org/html/rfc7159)
+* [Standard ECMA-404: The JSON Data Interchange Format](https://www.ecma-international.org/publications/standards/Ecma-404.htm)
 
 ## v1.1 中的亮点 (2016-8-25)
 
@@ -73,9 +73,9 @@ RapidJSON 是跨平台的。以下是一些曾测试的平台／编译器组合
 RapidJSON 是只有头文件的 C++ 库。只需把 `include/rapidjson` 目录复制至系统或项目的 include 目录中。
 
 RapidJSON 依赖于以下软件：
-* [CMake](http://www.cmake.org) 作为通用生成工具
-* (optional)[Doxygen](http://www.doxygen.org) 用于生成文档
-* (optional)[googletest](https://code.google.com/p/googletest/) 用于单元及性能测试
+* [CMake](https://cmake.org/) 作为通用生成工具
+* (optional) [Doxygen](http://www.doxygen.org) 用于生成文档
+* (optional) [googletest](https://github.com/google/googletest) 用于单元及性能测试
 
 生成测试及例子的步骤：
 
@@ -131,22 +131,22 @@ int main() {
 还有许多 [例子](https://github.com/miloyip/rapidjson/tree/master/example) 可供参考：
 
 * DOM API
- * [tutorial](https://github.com/miloyip/rapidjson/blob/master/example/tutorial/tutorial.cpp): DOM API 的基本使用方法。
+  * [tutorial](https://github.com/miloyip/rapidjson/blob/master/example/tutorial/tutorial.cpp): DOM API 的基本使用方法。
 
 * SAX API
- * [simplereader](https://github.com/miloyip/rapidjson/blob/master/example/simplereader/simplereader.cpp): 使用 `Reader` 解析 JSON 时，打印所有 SAX 事件。
- * [condense](https://github.com/miloyip/rapidjson/blob/master/example/condense/condense.cpp): 移除 JSON 中所有空白符的命令行工具。
- * [pretty](https://github.com/miloyip/rapidjson/blob/master/example/pretty/pretty.cpp): 为 JSON 加入缩进与换行的命令行工具，当中使用了 `PrettyWriter`。
- * [capitalize](https://github.com/miloyip/rapidjson/blob/master/example/capitalize/capitalize.cpp): 把 JSON 中所有字符串改为大写的命令行工具。
- * [messagereader](https://github.com/miloyip/rapidjson/blob/master/example/messagereader/messagereader.cpp): 使用 SAX API 去解析一个 JSON 报文。
- * [serialize](https://github.com/miloyip/rapidjson/blob/master/example/serialize/serialize.cpp): 使用 SAX API 去序列化 C++ 对象，生成 JSON。
- * [jsonx](https://github.com/miloyip/rapidjson/blob/master/example/jsonx/jsonx.cpp): 实现了一个 `JsonxWriter`，它能把 SAX 事件写成 [JSONx](https://www-01.ibm.com/support/knowledgecenter/SS9H2Y_7.1.0/com.ibm.dp.doc/json_jsonx.html)（一种 XML）格式。这个例子是把 JSON 输入转换成 JSONx 格式的命令行工具。
+  * [simplereader](https://github.com/miloyip/rapidjson/blob/master/example/simplereader/simplereader.cpp): 使用 `Reader` 解析 JSON 时，打印所有 SAX 事件。
+  * [condense](https://github.com/miloyip/rapidjson/blob/master/example/condense/condense.cpp): 移除 JSON 中所有空白符的命令行工具。
+  * [pretty](https://github.com/miloyip/rapidjson/blob/master/example/pretty/pretty.cpp): 为 JSON 加入缩进与换行的命令行工具，当中使用了 `PrettyWriter`。
+  * [capitalize](https://github.com/miloyip/rapidjson/blob/master/example/capitalize/capitalize.cpp): 把 JSON 中所有字符串改为大写的命令行工具。
+  * [messagereader](https://github.com/miloyip/rapidjson/blob/master/example/messagereader/messagereader.cpp): 使用 SAX API 去解析一个 JSON 报文。
+  * [serialize](https://github.com/miloyip/rapidjson/blob/master/example/serialize/serialize.cpp): 使用 SAX API 去序列化 C++ 对象，生成 JSON。
+  * [jsonx](https://github.com/miloyip/rapidjson/blob/master/example/jsonx/jsonx.cpp): 实现了一个 `JsonxWriter`，它能把 SAX 事件写成 [JSONx](https://www-01.ibm.com/support/knowledgecenter/SS9H2Y_7.1.0/com.ibm.dp.doc/json_jsonx.html)（一种 XML）格式。这个例子是把 JSON 输入转换成 JSONx 格式的命令行工具。
 
 * Schema API
- * [schemavalidator](https://github.com/miloyip/rapidjson/blob/master/example/schemavalidator/schemavalidator.cpp): 使用 JSON Schema 去校验 JSON 的命令行工具。
- 
+  * [schemavalidator](https://github.com/miloyip/rapidjson/blob/master/example/schemavalidator/schemavalidator.cpp): 使用 JSON Schema 去校验 JSON 的命令行工具。
+
 * 进阶
- * [prettyauto](https://github.com/miloyip/rapidjson/blob/master/example/prettyauto/prettyauto.cpp): [pretty](https://github.com/miloyip/rapidjson/blob/master/example/pretty/pretty.cpp) 的修改版本，可自动处理任何 UTF 编码的 JSON。
- * [parsebyparts](https://github.com/miloyip/rapidjson/blob/master/example/parsebyparts/parsebyparts.cpp): 这例子中的 `AsyncDocumentParser` 类使用 C++ 线程来逐段解析 JSON。
- * [filterkey](https://github.com/miloyip/rapidjson/blob/master/example/filterkey/filterkey.cpp): 移取使用者指定的键值的命令行工具。
- * [filterkeydom](https://github.com/miloyip/rapidjson/blob/master/example/filterkey/filterkey.cpp): 如上的工具，但展示如何使用生成器（generator）去填充一个 `Document`。
\ No newline at end of file
+  * [prettyauto](https://github.com/miloyip/rapidjson/blob/master/example/prettyauto/prettyauto.cpp): [pretty](https://github.com/miloyip/rapidjson/blob/master/example/pretty/pretty.cpp) 的修改版本，可自动处理任何 UTF 编码的 JSON。
+  * [parsebyparts](https://github.com/miloyip/rapidjson/blob/master/example/parsebyparts/parsebyparts.cpp): 这例子中的 `AsyncDocumentParser` 类使用 C++ 线程来逐段解析 JSON。
+  * [filterkey](https://github.com/miloyip/rapidjson/blob/master/example/filterkey/filterkey.cpp): 移取使用者指定的键值的命令行工具。
+  * [filterkeydom](https://github.com/miloyip/rapidjson/blob/master/example/filterkey/filterkey.cpp): 如上的工具，但展示如何使用生成器（generator）去填充一个 `Document`。
\ No newline at end of file
diff --git a/test/perftest/CMakeLists.txt b/test/perftest/CMakeLists.txt
index c33aae469..035e544d9 100644
--- a/test/perftest/CMakeLists.txt
+++ b/test/perftest/CMakeLists.txt
@@ -19,6 +19,8 @@ if(CCACHE_FOUND)
     endif()
 endif(CCACHE_FOUND)
 
+set_property(DIRECTORY PROPERTY COMPILE_OPTIONS ${EXTRA_CXX_FLAGS})
+
 IF(NOT (CMAKE_BUILD_TYPE STREQUAL "Debug"))
 add_test(NAME perftest
     COMMAND ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/perftest
diff --git a/test/perftest/perftest.h b/test/perftest/perftest.h
index b098e4147..953f95de8 100644
--- a/test/perftest/perftest.h
+++ b/test/perftest/perftest.h
@@ -24,10 +24,13 @@
 
 // __SSE2__ and __SSE4_2__ are recognized by gcc, clang, and the Intel compiler.
 // We use -march=native with gmake to enable -msse2 and -msse4.2, if supported.
+// Likewise, __ARM_NEON is used to detect Neon.
 #if defined(__SSE4_2__)
 #  define RAPIDJSON_SSE42
 #elif defined(__SSE2__)
 #  define RAPIDJSON_SSE2
+#elif defined(__ARM_NEON)
+#  define RAPIDJSON_NEON
 #endif
 
 #define RAPIDJSON_HAS_STDSTRING 1
diff --git a/test/perftest/rapidjsontest.cpp b/test/perftest/rapidjsontest.cpp
index 675db3182..a11a557d1 100644
--- a/test/perftest/rapidjsontest.cpp
+++ b/test/perftest/rapidjsontest.cpp
@@ -28,6 +28,8 @@
 #define SIMD_SUFFIX(name) name##_SSE2
 #elif defined(RAPIDJSON_SSE42)
 #define SIMD_SUFFIX(name) name##_SSE42
+#elif defined(RAPIDJSON_NEON)
+#define SIMD_SUFFIX(name) name##_NEON
 #else
 #define SIMD_SUFFIX(name) name
 #endif
@@ -152,6 +154,35 @@ TEST_F(RapidJson, SIMD_SUFFIX(ReaderParseIterativeInsitu_DummyHandler)) {
     }
 }
 
+TEST_F(RapidJson, SIMD_SUFFIX(ReaderParseIterativePull_DummyHandler)) {
+    for (size_t i = 0; i < kTrialCount; i++) {
+        StringStream s(json_);
+        BaseReaderHandler<> h;
+        Reader reader;
+        reader.IterativeParseInit();
+        while (!reader.IterativeParseComplete()) {
+            if (!reader.IterativeParseNext<kParseDefaultFlags>(s, h))
+                break;
+        }
+        EXPECT_FALSE(reader.HasParseError());
+    }
+}
+
+TEST_F(RapidJson, SIMD_SUFFIX(ReaderParseIterativePullInsitu_DummyHandler)) {
+    for (size_t i = 0; i < kTrialCount; i++) {
+        memcpy(temp_, json_, length_ + 1);
+        InsituStringStream s(temp_);
+        BaseReaderHandler<> h;
+        Reader reader;
+        reader.IterativeParseInit();
+        while (!reader.IterativeParseComplete()) {
+            if (!reader.IterativeParseNext<kParseDefaultFlags|kParseInsituFlag>(s, h))
+                break;
+        }
+        EXPECT_FALSE(reader.HasParseError());
+    }
+}
+
 TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler_ValidateEncoding)) {
     for (size_t i = 0; i < kTrialCount; i++) {
         StringStream s(json_);
diff --git a/test/unittest/CMakeLists.txt b/test/unittest/CMakeLists.txt
index b3204d6c8..fdf0ad067 100644
--- a/test/unittest/CMakeLists.txt
+++ b/test/unittest/CMakeLists.txt
@@ -36,10 +36,9 @@ if(CCACHE_FOUND)
 		endif()
 endif(CCACHE_FOUND)
 
-if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Wall -Wextra -Weffc++ -Wswitch-default -Wfloat-equal")
-elseif (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Wall -Wextra -Weffc++ -Wswitch-default -Wfloat-equal -Wimplicit-fallthrough -Weverything")
+set_property(DIRECTORY PROPERTY COMPILE_OPTIONS ${EXTRA_CXX_FLAGS})
+
+if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
     # If the user is running a newer version of Clang that includes the
     # -Wdouble-promotion, we will ignore that warning.
     if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 3.7)
@@ -80,7 +79,7 @@ add_test(NAME unittest
 if(NOT MSVC)
     # Not running SIMD.* unit test cases for Valgrind
     add_test(NAME valgrind_unittest
-        COMMAND valgrind --leak-check=full --error-exitcode=1 ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/unittest --gtest_filter=-SIMD.*
+        COMMAND valgrind --suppressions=${CMAKE_SOURCE_DIR}/test/valgrind.supp --leak-check=full --error-exitcode=1 ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/unittest --gtest_filter=-SIMD.*
         WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/bin)
 
     if(CMAKE_BUILD_TYPE STREQUAL "Debug")
diff --git a/test/unittest/documenttest.cpp b/test/unittest/documenttest.cpp
index ecd4b79bc..0ca58019b 100644
--- a/test/unittest/documenttest.cpp
+++ b/test/unittest/documenttest.cpp
@@ -128,8 +128,14 @@ TEST(Document, UnchangedOnParseError) {
     Document doc;
     doc.SetArray().PushBack(0, doc.GetAllocator());
 
+    ParseResult noError;
+    EXPECT_TRUE(noError);
+
     ParseResult err = doc.Parse("{]");
     EXPECT_TRUE(doc.HasParseError());
+    EXPECT_NE(err, noError);
+    EXPECT_NE(err.Code(), noError);
+    EXPECT_NE(noError, doc.GetParseError());
     EXPECT_EQ(err.Code(), doc.GetParseError());
     EXPECT_EQ(err.Offset(), doc.GetErrorOffset());
     EXPECT_TRUE(doc.IsArray());
@@ -138,6 +144,9 @@ TEST(Document, UnchangedOnParseError) {
     err = doc.Parse("{}");
     EXPECT_FALSE(doc.HasParseError());
     EXPECT_FALSE(err.IsError());
+    EXPECT_TRUE(err);
+    EXPECT_EQ(err, noError);
+    EXPECT_EQ(err.Code(), noError);
     EXPECT_EQ(err.Code(), doc.GetParseError());
     EXPECT_EQ(err.Offset(), doc.GetErrorOffset());
     EXPECT_TRUE(doc.IsObject());
@@ -488,15 +497,19 @@ TYPED_TEST(DocumentMove, MoveConstructorParseError) {
     a.Parse("{ 4 = 4]");
     ParseResult error(a.GetParseError(), a.GetErrorOffset());
     EXPECT_TRUE(a.HasParseError());
+    EXPECT_NE(error, noError);
+    EXPECT_NE(error.Code(), noError);
     EXPECT_NE(error.Code(), noError.Code());
     EXPECT_NE(error.Offset(), noError.Offset());
 
     D b(std::move(a));
     EXPECT_FALSE(a.HasParseError());
     EXPECT_TRUE(b.HasParseError());
+    EXPECT_EQ(a.GetParseError(), noError);
     EXPECT_EQ(a.GetParseError(), noError.Code());
-    EXPECT_EQ(b.GetParseError(), error.Code());
     EXPECT_EQ(a.GetErrorOffset(), noError.Offset());
+    EXPECT_EQ(b.GetParseError(), error);
+    EXPECT_EQ(b.GetParseError(), error.Code());
     EXPECT_EQ(b.GetErrorOffset(), error.Offset());
 
     D c(std::move(b));
diff --git a/test/unittest/encodingstest.cpp b/test/unittest/encodingstest.cpp
index 67b0391ed..82cf77761 100644
--- a/test/unittest/encodingstest.cpp
+++ b/test/unittest/encodingstest.cpp
@@ -267,7 +267,7 @@ static unsigned inline decode(unsigned* state, unsigned* codep, unsigned byte) {
 
     *codep = (*state != UTF8_ACCEPT) ?
         (byte & 0x3fu) | (*codep << 6) :
-    (0xff >> type) & (byte);
+    (0xffu >> type) & (byte);
 
     *state = utf8d[256 + *state + type];
     return *state;
diff --git a/test/unittest/fwdtest.cpp b/test/unittest/fwdtest.cpp
index 4f3268461..1936d9779 100644
--- a/test/unittest/fwdtest.cpp
+++ b/test/unittest/fwdtest.cpp
@@ -100,6 +100,9 @@ struct Foo {
 #include "rapidjson/prettywriter.h"
 #include "rapidjson/schema.h"   // -> pointer.h
 
+typedef Transcoder<UTF8<>, UTF8<> > TranscoderUtf8ToUtf8;
+typedef BaseReaderHandler<UTF8<>, void> BaseReaderHandlerUtf8Void;
+
 Foo::Foo() : 
     // encodings.h
     utf8(RAPIDJSON_NEW(UTF8<>)),
@@ -111,40 +114,40 @@ Foo::Foo() :
     utf32le(RAPIDJSON_NEW(UTF32LE<>)),
     ascii(RAPIDJSON_NEW(ASCII<>)),
     autoutf(RAPIDJSON_NEW(AutoUTF<unsigned>)),
-    transcoder(RAPIDJSON_NEW((Transcoder<UTF8<>, UTF8<> >))),
+    transcoder(RAPIDJSON_NEW(TranscoderUtf8ToUtf8)),
 
     // allocators.h
     crtallocator(RAPIDJSON_NEW(CrtAllocator)),
     memorypoolallocator(RAPIDJSON_NEW(MemoryPoolAllocator<>)),
 
     // stream.h
-    stringstream(RAPIDJSON_NEW(StringStream(0))),
-    insitustringstream(RAPIDJSON_NEW(InsituStringStream(0))),
+    stringstream(RAPIDJSON_NEW(StringStream)(NULL)),
+    insitustringstream(RAPIDJSON_NEW(InsituStringStream)(NULL)),
 
     // stringbuffer.h
     stringbuffer(RAPIDJSON_NEW(StringBuffer)),
 
     // // filereadstream.h
-    // filereadstream(RAPIDJSON_NEW(FileReadStream(stdout, buffer, sizeof(buffer)))),
+    // filereadstream(RAPIDJSON_NEW(FileReadStream)(stdout, buffer, sizeof(buffer))),
 
     // // filewritestream.h
-    // filewritestream(RAPIDJSON_NEW(FileWriteStream(stdout, buffer, sizeof(buffer)))),
+    // filewritestream(RAPIDJSON_NEW(FileWriteStream)(stdout, buffer, sizeof(buffer))),
 
     // memorybuffer.h
     memorybuffer(RAPIDJSON_NEW(MemoryBuffer)),
 
     // memorystream.h
-    memorystream(RAPIDJSON_NEW(MemoryStream(0, 0))),
+    memorystream(RAPIDJSON_NEW(MemoryStream)(NULL, 0)),
 
     // reader.h
-    basereaderhandler(RAPIDJSON_NEW((BaseReaderHandler<UTF8<>, void>))),
+    basereaderhandler(RAPIDJSON_NEW(BaseReaderHandlerUtf8Void)),
     reader(RAPIDJSON_NEW(Reader)),
 
     // writer.h
-    writer(RAPIDJSON_NEW((Writer<StringBuffer>))),
+    writer(RAPIDJSON_NEW(Writer<StringBuffer>)),
 
     // prettywriter.h
-    prettywriter(RAPIDJSON_NEW((PrettyWriter<StringBuffer>))),
+    prettywriter(RAPIDJSON_NEW(PrettyWriter<StringBuffer>)),
 
     // document.h
     value(RAPIDJSON_NEW(Value)),
@@ -154,8 +157,8 @@ Foo::Foo() :
     pointer(RAPIDJSON_NEW(Pointer)),
 
     // schema.h
-    schemadocument(RAPIDJSON_NEW(SchemaDocument(*document))),
-    schemavalidator(RAPIDJSON_NEW(SchemaValidator(*schemadocument)))
+    schemadocument(RAPIDJSON_NEW(SchemaDocument)(*document)),
+    schemavalidator(RAPIDJSON_NEW(SchemaValidator)(*schemadocument))
 {
 
 }
diff --git a/test/unittest/itoatest.cpp b/test/unittest/itoatest.cpp
index b752a6a26..2f66bedc3 100644
--- a/test/unittest/itoatest.cpp
+++ b/test/unittest/itoatest.cpp
@@ -74,7 +74,7 @@ static void Verify(void(*f)(T, char*), char* (*g)(T, char*)) {
     VerifyValue<T>(std::numeric_limits<T>::max(), f, g);
 
     // 2^n - 1, 2^n, 10^n - 1, 10^n until overflow
-    for (uint32_t power = 2; power <= 10; power += 8) {
+    for (int power = 2; power <= 10; power += 8) {
         T i = 1, last;
         do {
             VerifyValue<T>(i - 1, f, g);
@@ -86,7 +86,7 @@ static void Verify(void(*f)(T, char*), char* (*g)(T, char*)) {
             last = i;
             if (i > static_cast<T>(std::numeric_limits<T>::max() / static_cast<T>(power)))
                 break;
-            i *= power;
+            i *= static_cast<T>(power);
         } while (last < i);
     }
 }
diff --git a/test/unittest/jsoncheckertest.cpp b/test/unittest/jsoncheckertest.cpp
index bea788d26..47c2b567b 100644
--- a/test/unittest/jsoncheckertest.cpp
+++ b/test/unittest/jsoncheckertest.cpp
@@ -48,6 +48,24 @@ static char* ReadFile(const char* filename, size_t& length) {
     return json;
 }
 
+struct NoOpHandler {
+    bool Null() { return true; }
+    bool Bool(bool) { return true; }
+    bool Int(int) { return true; }
+    bool Uint(unsigned) { return true; }
+    bool Int64(int64_t) { return true; }
+    bool Uint64(uint64_t) { return true; }
+    bool Double(double) { return true; }
+    bool RawNumber(const char*, SizeType, bool) { return true; }
+    bool String(const char*, SizeType, bool) { return true; }
+    bool StartObject() { return true; }
+    bool Key(const char*, SizeType, bool) { return true; }
+    bool EndObject(SizeType) { return true; }
+    bool StartArray() { return true; }
+    bool EndArray(SizeType) { return true; }
+};
+
+
 TEST(JsonChecker, Reader) {
     char filename[256];
 
@@ -67,13 +85,26 @@ TEST(JsonChecker, Reader) {
             continue;
         }
 
+        // Test stack-based parsing.
         GenericDocument<UTF8<>, CrtAllocator> document; // Use Crt allocator to check exception-safety (no memory leak)
         document.Parse(json);
-        EXPECT_TRUE(document.HasParseError());
+        EXPECT_TRUE(document.HasParseError()) << filename;
 
+        // Test iterative parsing.
         document.Parse<kParseIterativeFlag>(json);
-        EXPECT_TRUE(document.HasParseError());
-
+        EXPECT_TRUE(document.HasParseError()) << filename;
+
+        // Test iterative pull-parsing.
+        Reader reader;
+        StringStream ss(json);
+        NoOpHandler h;
+        reader.IterativeParseInit();
+        while (!reader.IterativeParseComplete()) {
+            if (!reader.IterativeParseNext<kParseDefaultFlags>(ss, h))
+                break;
+        }
+        EXPECT_TRUE(reader.HasParseError()) << filename;
+        
         free(json);
     }
 
@@ -87,12 +118,25 @@ TEST(JsonChecker, Reader) {
             continue;
         }
 
+        // Test stack-based parsing.
         GenericDocument<UTF8<>, CrtAllocator> document; // Use Crt allocator to check exception-safety (no memory leak)
         document.Parse(json);
-        EXPECT_FALSE(document.HasParseError());
+        EXPECT_FALSE(document.HasParseError()) << filename;
 
+        // Test iterative parsing.
         document.Parse<kParseIterativeFlag>(json);
-        EXPECT_FALSE(document.HasParseError());
+        EXPECT_FALSE(document.HasParseError()) << filename;
+        
+        // Test iterative pull-parsing.
+        Reader reader;
+        StringStream ss(json);
+        NoOpHandler h;
+        reader.IterativeParseInit();
+        while (!reader.IterativeParseComplete()) {
+            if (!reader.IterativeParseNext<kParseDefaultFlags>(ss, h))
+                break;
+        }
+        EXPECT_FALSE(reader.HasParseError()) << filename;
 
         free(json);
     }
diff --git a/test/unittest/ostreamwrappertest.cpp b/test/unittest/ostreamwrappertest.cpp
index b1d1cd827..50f8da63e 100644
--- a/test/unittest/ostreamwrappertest.cpp
+++ b/test/unittest/ostreamwrappertest.cpp
@@ -69,14 +69,15 @@ static void TestFileStream() {
 
     const char* s = "Hello World!\n";
     {
-        ofstream ofs(filename, ios::out | ios::binary);
-        BasicOStreamWrapper<ofstream> osw(ofs);
+        FileStreamType ofs(filename, ios::out | ios::binary);
+        BasicOStreamWrapper<FileStreamType> osw(ofs);
         for (const char* p = s; *p; p++)
             osw.Put(*p);
         osw.Flush();
     }
 
     fp = fopen(filename, "r");
+	ASSERT_TRUE( fp != NULL );
     for (const char* p = s; *p; p++)
         EXPECT_EQ(*p, static_cast<char>(fgetc(fp)));
     fclose(fp);
diff --git a/test/unittest/pointertest.cpp b/test/unittest/pointertest.cpp
index dbddbedee..eed6fba90 100644
--- a/test/unittest/pointertest.cpp
+++ b/test/unittest/pointertest.cpp
@@ -441,8 +441,8 @@ TEST(Pointer, Stringify) {
 }
 
 // Construct a Pointer with static tokens, no dynamic allocation involved.
-#define NAME(s) { s, sizeof(s) / sizeof(s[0]) - 1, kPointerInvalidIndex }
-#define INDEX(i) { #i, sizeof(#i) - 1, i }
+#define NAME(s) { s, static_cast<SizeType>(sizeof(s) / sizeof(s[0]) - 1), kPointerInvalidIndex }
+#define INDEX(i) { #i, static_cast<SizeType>(sizeof(#i) - 1), i }
 
 static const Pointer::Token kTokens[] = { NAME("foo"), INDEX(0) }; // equivalent to "/foo/0"
 
diff --git a/test/unittest/prettywritertest.cpp b/test/unittest/prettywritertest.cpp
index 13d1a8d93..43617a2f5 100644
--- a/test/unittest/prettywritertest.cpp
+++ b/test/unittest/prettywritertest.cpp
@@ -167,6 +167,7 @@ TEST(PrettyWriter, OStreamWrapper) {
 TEST(PrettyWriter, FileWriteStream) {
     char filename[L_tmpnam];
     FILE* fp = TempFile(filename);
+    ASSERT_TRUE(fp!=NULL);
     char buffer[16];
     FileWriteStream os(fp, buffer, sizeof(buffer));
     PrettyWriter<FileWriteStream> writer(os);
@@ -207,6 +208,114 @@ TEST(PrettyWriter, RawValue) {
         buffer.GetString());
 }
 
+TEST(PrettyWriter, InvalidEventSequence) {
+    // {]
+    {
+        StringBuffer buffer;
+        PrettyWriter<StringBuffer> writer(buffer);
+        writer.StartObject();
+        EXPECT_THROW(writer.EndArray(), AssertException);
+        EXPECT_FALSE(writer.IsComplete());
+    }
+    
+    // [}
+    {
+        StringBuffer buffer;
+        PrettyWriter<StringBuffer> writer(buffer);
+        writer.StartArray();
+        EXPECT_THROW(writer.EndObject(), AssertException);
+        EXPECT_FALSE(writer.IsComplete());
+    }
+    
+    // { 1:
+    {
+        StringBuffer buffer;
+        PrettyWriter<StringBuffer> writer(buffer);
+        writer.StartObject();
+        EXPECT_THROW(writer.Int(1), AssertException);
+        EXPECT_FALSE(writer.IsComplete());
+    }
+    
+    // { 'a' }
+    {
+        StringBuffer buffer;
+        PrettyWriter<StringBuffer> writer(buffer);
+        writer.StartObject();
+        writer.Key("a");
+        EXPECT_THROW(writer.EndObject(), AssertException);
+        EXPECT_FALSE(writer.IsComplete());
+    }
+    
+    // { 'a':'b','c' }
+    {
+        StringBuffer buffer;
+        PrettyWriter<StringBuffer> writer(buffer);
+        writer.StartObject();
+        writer.Key("a");
+        writer.String("b");
+        writer.Key("c");
+        EXPECT_THROW(writer.EndObject(), AssertException);
+        EXPECT_FALSE(writer.IsComplete());
+    }
+}
+
+TEST(PrettyWriter, NaN) {
+    double nan = std::numeric_limits<double>::quiet_NaN();
+
+    EXPECT_TRUE(internal::Double(nan).IsNan());
+    StringBuffer buffer;
+    {
+        PrettyWriter<StringBuffer> writer(buffer);
+        EXPECT_FALSE(writer.Double(nan));
+    }
+    {
+        PrettyWriter<StringBuffer, UTF8<>, UTF8<>, CrtAllocator, kWriteNanAndInfFlag> writer(buffer);
+        EXPECT_TRUE(writer.Double(nan));
+        EXPECT_STREQ("NaN", buffer.GetString());
+    }
+    GenericStringBuffer<UTF16<> > buffer2;
+    PrettyWriter<GenericStringBuffer<UTF16<> > > writer2(buffer2);
+    EXPECT_FALSE(writer2.Double(nan));
+}
+
+TEST(PrettyWriter, Inf) {
+    double inf = std::numeric_limits<double>::infinity();
+
+    EXPECT_TRUE(internal::Double(inf).IsInf());
+    StringBuffer buffer;
+    {
+        PrettyWriter<StringBuffer> writer(buffer);
+        EXPECT_FALSE(writer.Double(inf));
+    }
+    {
+        PrettyWriter<StringBuffer> writer(buffer);
+        EXPECT_FALSE(writer.Double(-inf));
+    }
+    {
+        PrettyWriter<StringBuffer, UTF8<>, UTF8<>, CrtAllocator, kWriteNanAndInfFlag> writer(buffer);
+        EXPECT_TRUE(writer.Double(inf));
+    }
+    {
+        PrettyWriter<StringBuffer, UTF8<>, UTF8<>, CrtAllocator, kWriteNanAndInfFlag> writer(buffer);
+        EXPECT_TRUE(writer.Double(-inf));
+    }
+    EXPECT_STREQ("Infinity-Infinity", buffer.GetString());
+}
+
+TEST(PrettyWriter, Issue_889) {
+    char buf[100] = "Hello";
+    
+    StringBuffer buffer;
+    PrettyWriter<StringBuffer> writer(buffer);
+    writer.StartArray();
+    writer.String(buf);
+    writer.EndArray();
+    
+    EXPECT_STREQ("[\n    \"Hello\"\n]", buffer.GetString());
+    EXPECT_TRUE(writer.IsComplete()); \
+}
+
+
 #if RAPIDJSON_HAS_CXX11_RVALUE_REFS
 
 static PrettyWriter<StringBuffer> WriterGen(StringBuffer &target) {
diff --git a/test/unittest/readertest.cpp b/test/unittest/readertest.cpp
index 64a1f9c3c..5078f5246 100644
--- a/test/unittest/readertest.cpp
+++ b/test/unittest/readertest.cpp
@@ -245,13 +245,13 @@ static void TestParseDouble() {
     TEST_DOUBLE(fullPrecision, "0.017976931348623157e+310", 1.7976931348623157e+308); // Max double in another form
 
     // Since
-    // abs((2^-1022 - 2^-1074) - 2.2250738585072012e-308) = 3.109754131239141401123495768877590405345064751974375599... �� 10^-324
-    // abs((2^-1022) - 2.2250738585072012e-308) = 1.830902327173324040642192159804623318305533274168872044... �� 10 ^ -324
+    // abs((2^-1022 - 2^-1074) - 2.2250738585072012e-308) = 3.109754131239141401123495768877590405345064751974375599... x 10^-324
+    // abs((2^-1022) - 2.2250738585072012e-308) = 1.830902327173324040642192159804623318305533274168872044... x 10 ^ -324
     // So 2.2250738585072012e-308 should round to 2^-1022 = 2.2250738585072014e-308
     TEST_DOUBLE(fullPrecision, "2.2250738585072012e-308", 2.2250738585072014e-308); // http://www.exploringbinary.com/java-hangs-when-converting-2-2250738585072012e-308/
 
     // More closer to normal/subnormal boundary
-    // boundary = 2^-1022 - 2^-1075 = 2.225073858507201136057409796709131975934819546351645648... �� 10^-308
+    // boundary = 2^-1022 - 2^-1075 = 2.225073858507201136057409796709131975934819546351645648... x 10^-308
     TEST_DOUBLE(fullPrecision, "2.22507385850720113605740979670913197593481954635164564e-308", 2.2250738585072009e-308);
     TEST_DOUBLE(fullPrecision, "2.22507385850720113605740979670913197593481954635164565e-308", 2.2250738585072014e-308);
 
@@ -725,6 +725,8 @@ TEST(Reader, ParseString_Error) {
 
     // Malform ASCII sequence
     TEST_STRINGENCODING_ERROR(ASCII<>, UTF8<>, char, ARRAY('[', '\"', char(0x80u), '\"', ']', '\0'));
+    TEST_STRINGENCODING_ERROR(ASCII<>, UTF8<>, char, ARRAY('[', '\"', char(0x01u), '\"', ']', '\0'));
+    TEST_STRINGENCODING_ERROR(ASCII<>, UTF8<>, char, ARRAY('[', '\"', char(0x1Cu), '\"', ']', '\0'));
 
 #undef ARRAY
 #undef TEST_STRINGARRAY_ERROR
@@ -1157,22 +1159,22 @@ template<typename Encoding = UTF8<> >
 struct IterativeParsingReaderHandler {
     typedef typename Encoding::Ch Ch;
 
-    const static int LOG_NULL = -1;
-    const static int LOG_BOOL = -2;
-    const static int LOG_INT = -3;
-    const static int LOG_UINT = -4;
-    const static int LOG_INT64 = -5;
-    const static int LOG_UINT64 = -6;
-    const static int LOG_DOUBLE = -7;
-    const static int LOG_STRING = -8;
-    const static int LOG_STARTOBJECT = -9;
-    const static int LOG_KEY = -10;
-    const static int LOG_ENDOBJECT = -11;
-    const static int LOG_STARTARRAY = -12;
-    const static int LOG_ENDARRAY = -13;
+    const static uint32_t LOG_NULL        = 0x10000000;
+    const static uint32_t LOG_BOOL        = 0x20000000;
+    const static uint32_t LOG_INT         = 0x30000000;
+    const static uint32_t LOG_UINT        = 0x40000000;
+    const static uint32_t LOG_INT64       = 0x50000000;
+    const static uint32_t LOG_UINT64      = 0x60000000;
+    const static uint32_t LOG_DOUBLE      = 0x70000000;
+    const static uint32_t LOG_STRING      = 0x80000000;
+    const static uint32_t LOG_STARTOBJECT = 0x90000000;
+    const static uint32_t LOG_KEY         = 0xA0000000;
+    const static uint32_t LOG_ENDOBJECT   = 0xB0000000;
+    const static uint32_t LOG_STARTARRAY  = 0xC0000000;
+    const static uint32_t LOG_ENDARRAY    = 0xD0000000;
 
     const static size_t LogCapacity = 256;
-    int Logs[LogCapacity];
+    uint32_t Logs[LogCapacity];
     size_t LogCount;
 
     IterativeParsingReaderHandler() : LogCount(0) {
@@ -1202,8 +1204,8 @@ struct IterativeParsingReaderHandler {
 
     bool EndObject(SizeType c) {
         RAPIDJSON_ASSERT(LogCount < LogCapacity);
-        Logs[LogCount++] = LOG_ENDOBJECT;
-        Logs[LogCount++] = static_cast<int>(c);
+        RAPIDJSON_ASSERT((static_cast<uint32_t>(c) & 0xF0000000) == 0);
+        Logs[LogCount++] = LOG_ENDOBJECT | static_cast<uint32_t>(c);
         return true;
     }
 
@@ -1211,8 +1213,8 @@ struct IterativeParsingReaderHandler {
 
     bool EndArray(SizeType c) {
         RAPIDJSON_ASSERT(LogCount < LogCapacity);
-        Logs[LogCount++] = LOG_ENDARRAY;
-        Logs[LogCount++] = static_cast<int>(c);
+        RAPIDJSON_ASSERT((static_cast<uint32_t>(c) & 0xF0000000) == 0);
+        Logs[LogCount++] = LOG_ENDARRAY | static_cast<uint32_t>(c);
         return true;
     }
 };
@@ -1228,7 +1230,7 @@ TEST(Reader, IterativeParsing_General) {
         EXPECT_FALSE(r.IsError());
         EXPECT_FALSE(reader.HasParseError());
 
-        int e[] = {
+        uint32_t e[] = {
             handler.LOG_STARTARRAY,
             handler.LOG_INT,
             handler.LOG_STARTOBJECT,
@@ -1236,14 +1238,14 @@ TEST(Reader, IterativeParsing_General) {
             handler.LOG_STARTARRAY,
             handler.LOG_INT,
             handler.LOG_INT,
-            handler.LOG_ENDARRAY, 2,
-            handler.LOG_ENDOBJECT, 1,
+            handler.LOG_ENDARRAY | 2,
+            handler.LOG_ENDOBJECT | 1,
             handler.LOG_NULL,
             handler.LOG_BOOL,
             handler.LOG_BOOL,
             handler.LOG_STRING,
             handler.LOG_DOUBLE,
-            handler.LOG_ENDARRAY, 7
+            handler.LOG_ENDARRAY | 7
         };
 
         EXPECT_EQ(sizeof(e) / sizeof(int), handler.LogCount);
@@ -1265,20 +1267,20 @@ TEST(Reader, IterativeParsing_Count) {
         EXPECT_FALSE(r.IsError());
         EXPECT_FALSE(reader.HasParseError());
 
-        int e[] = {
+        uint32_t e[] = {
             handler.LOG_STARTARRAY,
             handler.LOG_STARTOBJECT,
-            handler.LOG_ENDOBJECT, 0,
+            handler.LOG_ENDOBJECT | 0,
             handler.LOG_STARTOBJECT,
             handler.LOG_KEY,
             handler.LOG_INT,
-            handler.LOG_ENDOBJECT, 1,
+            handler.LOG_ENDOBJECT | 1,
             handler.LOG_STARTARRAY,
             handler.LOG_INT,
-            handler.LOG_ENDARRAY, 1,
+            handler.LOG_ENDARRAY | 1,
             handler.LOG_STARTARRAY,
-            handler.LOG_ENDARRAY, 0,
-            handler.LOG_ENDARRAY, 4
+            handler.LOG_ENDARRAY | 0,
+            handler.LOG_ENDARRAY | 4
         };
 
         EXPECT_EQ(sizeof(e) / sizeof(int), handler.LogCount);
@@ -1289,6 +1291,51 @@ TEST(Reader, IterativeParsing_Count) {
     }
 }
 
+TEST(Reader, IterativePullParsing_General) {
+    {
+        IterativeParsingReaderHandler<> handler;
+        uint32_t e[] = {
+            handler.LOG_STARTARRAY,
+            handler.LOG_INT,
+            handler.LOG_STARTOBJECT,
+            handler.LOG_KEY,
+            handler.LOG_STARTARRAY,
+            handler.LOG_INT,
+            handler.LOG_INT,
+            handler.LOG_ENDARRAY | 2,
+            handler.LOG_ENDOBJECT | 1,
+            handler.LOG_NULL,
+            handler.LOG_BOOL,
+            handler.LOG_BOOL,
+            handler.LOG_STRING,
+            handler.LOG_DOUBLE,
+            handler.LOG_ENDARRAY | 7
+        };
+        
+        StringStream is("[1, {\"k\": [1, 2]}, null, false, true, \"string\", 1.2]");
+        Reader reader;
+        
+        reader.IterativeParseInit();
+        while (!reader.IterativeParseComplete()) {
+            size_t oldLogCount = handler.LogCount;
+            EXPECT_TRUE(oldLogCount < sizeof(e) / sizeof(int)) << "overrun";
+            
+            EXPECT_TRUE(reader.IterativeParseNext<kParseDefaultFlags>(is, handler)) << "parse fail";
+            EXPECT_EQ(handler.LogCount, oldLogCount + 1) << "handler should be invoked exactly once each time";
+            EXPECT_EQ(e[oldLogCount], handler.Logs[oldLogCount]) << "wrong event returned";
+        }
+        
+        EXPECT_FALSE(reader.HasParseError());
+        EXPECT_EQ(sizeof(e) / sizeof(int), handler.LogCount) << "handler invoked wrong number of times";
+
+        // The handler should not be invoked when the JSON has been fully read, but it should not fail
+        size_t oldLogCount = handler.LogCount;
+        EXPECT_TRUE(reader.IterativeParseNext<kParseDefaultFlags>(is, handler)) << "parse-next past complete is allowed";
+        EXPECT_EQ(handler.LogCount, oldLogCount) << "parse-next past complete should not invoke handler";
+        EXPECT_FALSE(reader.HasParseError()) << "parse-next past complete should not generate parse error";
+    }
+}
+
 // Test iterative parsing on kParseErrorTermination.
 struct HandlerTerminateAtStartObject : public IterativeParsingReaderHandler<> {
     bool StartObject() { return false; }
@@ -1832,6 +1879,10 @@ TEST(Reader, ParseNanAndInfinity) {
     TEST_NAN_INF("Infinity", inf);
     TEST_NAN_INF("-Inf", -inf);
     TEST_NAN_INF("-Infinity", -inf);
+    TEST_NAN_INF_ERROR(kParseErrorValueInvalid, "NInf", 1);
+    TEST_NAN_INF_ERROR(kParseErrorValueInvalid, "NaInf", 2);
+    TEST_NAN_INF_ERROR(kParseErrorValueInvalid, "INan", 1);
+    TEST_NAN_INF_ERROR(kParseErrorValueInvalid, "InNan", 2);
     TEST_NAN_INF_ERROR(kParseErrorValueInvalid, "nan", 1);
     TEST_NAN_INF_ERROR(kParseErrorValueInvalid, "-nan", 1);
     TEST_NAN_INF_ERROR(kParseErrorValueInvalid, "NAN", 1);
diff --git a/test/unittest/schematest.cpp b/test/unittest/schematest.cpp
index 6a8b685f4..9b99ba896 100644
--- a/test/unittest/schematest.cpp
+++ b/test/unittest/schematest.cpp
@@ -1281,6 +1281,12 @@ TEST(SchemaValidatingWriter, Simple) {
     EXPECT_TRUE(validator.GetInvalidDocumentPointer() == SchemaDocument::PointerType(""));
 }
 
+TEST(Schema, Issue848) {
+    rapidjson::Document d;
+    rapidjson::SchemaDocument s(d);
+    rapidjson::GenericSchemaValidator<rapidjson::SchemaDocument, rapidjson::Document> v(s);
+}
+
 #if RAPIDJSON_HAS_CXX11_RVALUE_REFS
 
 static SchemaDocument ReturnSchemaDocument() {
@@ -1308,6 +1314,40 @@ TEST(SchemaValidator, Issue608) {
     INVALIDATE(s, "{\"a\" : null, \"a\" : null}", "", "required", "");
 }
 
+// Fail to resolve $ref in allOf causes crash in SchemaValidator::StartObject()
+TEST(SchemaValidator, Issue728_AllOfRef) {
+    Document sd;
+    sd.Parse("{\"allOf\": [{\"$ref\": \"#/abc\"}]}");
+    SchemaDocument s(sd);
+    VALIDATE(s, "{\"key1\": \"abc\", \"key2\": \"def\"}", true);
+}
+
+TEST(SchemaValidator, Issue825) {
+    Document sd;
+    sd.Parse("{\"type\": \"object\", \"additionalProperties\": false, \"patternProperties\": {\"^i\": { \"type\": \"string\" } } }");
+    SchemaDocument s(sd);
+    VALIDATE(s, "{ \"item\": \"hello\" }", true);
+}
+
+TEST(SchemaValidator, Issue1017_allOfHandler) {
+    Document sd;
+    sd.Parse("{\"allOf\": [{\"type\": \"object\",\"properties\": {\"cyanArray2\": {\"type\": \"array\",\"items\": { \"type\": \"string\" }}}},{\"type\": \"object\",\"properties\": {\"blackArray\": {\"type\": \"array\",\"items\": { \"type\": \"string\" }}},\"required\": [ \"blackArray\" ]}]}");
+    SchemaDocument s(sd);
+    StringBuffer sb;
+    Writer<StringBuffer> writer(sb);
+    GenericSchemaValidator<SchemaDocument, Writer<StringBuffer> > validator(s, writer);
+    EXPECT_TRUE(validator.StartObject());
+    EXPECT_TRUE(validator.Key("cyanArray2", 10, false));
+    EXPECT_TRUE(validator.StartArray());    
+    EXPECT_TRUE(validator.EndArray(0));    
+    EXPECT_TRUE(validator.Key("blackArray", 10, false));
+    EXPECT_TRUE(validator.StartArray());    
+    EXPECT_TRUE(validator.EndArray(0));    
+    EXPECT_TRUE(validator.EndObject(0));
+    EXPECT_TRUE(validator.IsValid());
+    EXPECT_STREQ("{\"cyanArray2\":[],\"blackArray\":[]}", sb.GetString());
+}
+
 #ifdef __clang__
 RAPIDJSON_DIAG_POP
 #endif
diff --git a/test/unittest/simdtest.cpp b/test/unittest/simdtest.cpp
index b01b559f4..7b58cd05f 100644
--- a/test/unittest/simdtest.cpp
+++ b/test/unittest/simdtest.cpp
@@ -21,6 +21,8 @@
 #  define RAPIDJSON_SSE42
 #elif defined(__SSE2__)
 #  define RAPIDJSON_SSE2
+#elif defined(__ARM_NEON)
+#  define RAPIDJSON_NEON
 #endif
 
 #define RAPIDJSON_NAMESPACE rapidjson_simd
@@ -41,6 +43,8 @@ using namespace rapidjson_simd;
 #define SIMD_SUFFIX(name) name##_SSE2
 #elif defined(RAPIDJSON_SSE42)
 #define SIMD_SUFFIX(name) name##_SSE42
+#elif defined(RAPIDJSON_NEON)
+#define SIMD_SUFFIX(name) name##_NEON
 #else
 #define SIMD_SUFFIX(name) name
 #endif
diff --git a/test/unittest/unittest.h b/test/unittest/unittest.h
index e125bf88d..aa091aa56 100644
--- a/test/unittest/unittest.h
+++ b/test/unittest/unittest.h
@@ -78,7 +78,7 @@ inline Ch* StrDup(const Ch* str) {
 }
 
 inline FILE* TempFile(char *filename) {
-#ifdef _MSC_VER
+#if defined(__WIN32__) || defined(_MSC_VER)
     filename = tmpnam(filename);
 
     // For Visual Studio, tmpnam() adds a backslash in front. Remove it.
@@ -117,7 +117,7 @@ class AssertException : public std::logic_error {
 #pragma GCC diagnostic pop
 #endif
 
-#define RAPIDJSON_ASSERT(x) if (!(x)) throw AssertException(RAPIDJSON_STRINGIFY(x))
+#define RAPIDJSON_ASSERT(x) (!(x) ? throw AssertException(RAPIDJSON_STRINGIFY(x)) : (void)0u)
 
 class Random {
 public:
diff --git a/test/unittest/valuetest.cpp b/test/unittest/valuetest.cpp
index fefc001d4..307e1b06d 100644
--- a/test/unittest/valuetest.cpp
+++ b/test/unittest/valuetest.cpp
@@ -857,9 +857,46 @@ TEST(Value, String) {
 }
 
 // Issue 226: Value of string type should not point to NULL
-TEST(Value, SetStringNullException) {
-    Value v;
-    EXPECT_THROW(v.SetString(0, 0), AssertException);
+TEST(Value, SetStringNull) {
+
+    MemoryPoolAllocator<> allocator;
+    const char* nullPtr = 0;
+    {
+        // Construction with string type creates empty string
+        Value v(kStringType);
+        EXPECT_NE(v.GetString(), nullPtr); // non-null string returned
+        EXPECT_EQ(v.GetStringLength(), 0u);
+
+        // Construction from/setting to null without length not allowed
+        EXPECT_THROW(Value(StringRef(nullPtr)), AssertException);
+        EXPECT_THROW(Value(StringRef(nullPtr), allocator), AssertException);
+        EXPECT_THROW(v.SetString(nullPtr, allocator), AssertException);
+
+        // Non-empty length with null string is not allowed
+        EXPECT_THROW(v.SetString(nullPtr, 17u), AssertException);
+        EXPECT_THROW(v.SetString(nullPtr, 42u, allocator), AssertException);
+
+        // Setting to null string with empty length is allowed
+        v.SetString(nullPtr, 0u);
+        EXPECT_NE(v.GetString(), nullPtr); // non-null string returned
+        EXPECT_EQ(v.GetStringLength(), 0u);
+
+        v.SetNull();
+        v.SetString(nullPtr, 0u, allocator);
+        EXPECT_NE(v.GetString(), nullPtr); // non-null string returned
+        EXPECT_EQ(v.GetStringLength(), 0u);
+    }
+    // Construction with null string and empty length is allowed
+    {
+        Value v(nullPtr,0u);
+        EXPECT_NE(v.GetString(), nullPtr); // non-null string returned
+        EXPECT_EQ(v.GetStringLength(), 0u);
+    }
+    {
+        Value v(nullPtr, 0u, allocator);
+        EXPECT_NE(v.GetString(), nullPtr); // non-null string returned
+        EXPECT_EQ(v.GetStringLength(), 0u);
+    }
 }
 
 template <typename T, typename Allocator>
diff --git a/test/unittest/writertest.cpp b/test/unittest/writertest.cpp
index d346e0f3e..b190c6c28 100644
--- a/test/unittest/writertest.cpp
+++ b/test/unittest/writertest.cpp
@@ -100,6 +100,19 @@ TEST(Writer, String) {
 #endif
 }
 
+TEST(Writer, Issue_889) {
+    char buf[100] = "Hello";
+    
+    StringBuffer buffer;
+    Writer<StringBuffer> writer(buffer);
+    writer.StartArray();
+    writer.String(buf);
+    writer.EndArray();
+    
+    EXPECT_STREQ("[\"Hello\"]", buffer.GetString());
+    EXPECT_TRUE(writer.IsComplete()); \
+}
+
 TEST(Writer, ScanWriteUnescapedString) {
     const char json[] = "[\" \\\"0123456789ABCDEF\"]";
     //                       ^ scanning stops here.
@@ -399,8 +412,10 @@ TEST(Writer, ValidateEncoding) {
         EXPECT_TRUE(writer.String("\xC2\xA2"));         // Cents sign U+00A2
         EXPECT_TRUE(writer.String("\xE2\x82\xAC"));     // Euro sign U+20AC
         EXPECT_TRUE(writer.String("\xF0\x9D\x84\x9E")); // G clef sign U+1D11E
+        EXPECT_TRUE(writer.String("\x01"));             // SOH control U+0001
+        EXPECT_TRUE(writer.String("\x1B"));             // Escape control U+001B
         writer.EndArray();
-        EXPECT_STREQ("[\"\x24\",\"\xC2\xA2\",\"\xE2\x82\xAC\",\"\xF0\x9D\x84\x9E\"]", buffer.GetString());
+        EXPECT_STREQ("[\"\x24\",\"\xC2\xA2\",\"\xE2\x82\xAC\",\"\xF0\x9D\x84\x9E\",\"\\u0001\",\"\\u001B\"]", buffer.GetString());
     }
 
     // Fail in decoding invalid UTF-8 sequence http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
@@ -442,6 +457,28 @@ TEST(Writer, InvalidEventSequence) {
         EXPECT_THROW(writer.Int(1), AssertException);
         EXPECT_FALSE(writer.IsComplete());
     }
+
+    // { 'a' }
+    {
+        StringBuffer buffer;
+        Writer<StringBuffer> writer(buffer);
+        writer.StartObject();
+        writer.Key("a");
+        EXPECT_THROW(writer.EndObject(), AssertException);
+        EXPECT_FALSE(writer.IsComplete());
+    }
+
+    // { 'a':'b','c' }
+    {
+        StringBuffer buffer;
+        Writer<StringBuffer> writer(buffer);
+        writer.StartObject();
+        writer.Key("a");
+        writer.String("b");
+        writer.Key("c");
+        EXPECT_THROW(writer.EndObject(), AssertException);
+        EXPECT_FALSE(writer.IsComplete());
+    }
 }
 
 TEST(Writer, NaN) {
diff --git a/test/valgrind.supp b/test/valgrind.supp
new file mode 100644
index 000000000..1fed18bea
--- /dev/null
+++ b/test/valgrind.supp
@@ -0,0 +1,17 @@
+{
+	Suppress wcslen valgrind report 1
+	Memcheck:Cond
+	fun:__wcslen_sse2
+}
+
+{
+    Suppress wcslen valgrind report 2
+    Memcheck:Addr8
+    fun:__wcslen_sse2
+}
+
+{
+    Suppress wcslen valgrind report 3
+    Memcheck:Value8
+    fun:__wcslen_sse2
+}
diff --git a/travis-doxygen.sh b/travis-doxygen.sh
index 31a50cfa9..e5c03206a 100755
--- a/travis-doxygen.sh
+++ b/travis-doxygen.sh
@@ -4,10 +4,9 @@
 
 set -e
 
-DOXYGEN_VER=doxygen-1.8.7
+DOXYGEN_VER=doxygen-1.8.13
 DOXYGEN_TAR=${DOXYGEN_VER}.linux.bin.tar.gz
 DOXYGEN_URL="http://ftp.stack.nl/pub/users/dimitri/${DOXYGEN_TAR}"
-DOXYGEN_BIN="/usr/local/bin/doxygen"
 
 : ${GITHUB_REPO:="miloyip/rapidjson"}
 GITHUB_HOST="github.com"
@@ -66,7 +65,7 @@ gh_pages_prepare()
 	[ ! -d "html" ] || \
 		abort "Doxygen target directory already exists."
 	git --version
-	git clone -b gh-pages "${GITHUB_CLONE}" html
+	git clone --single-branch -b gh-pages "${GITHUB_CLONE}" html
 	cd html
 	# setup git config (with defaults)
 	git config user.name "${GIT_NAME-travis}"