diff --git a/formats/json-okio/commonMain/src/kotlinx/serialization/json/okio/internal/OkioJsonStreams.kt b/formats/json-okio/commonMain/src/kotlinx/serialization/json/okio/internal/OkioJsonStreams.kt index 2d5485c16..ae8de4719 100644 --- a/formats/json-okio/commonMain/src/kotlinx/serialization/json/okio/internal/OkioJsonStreams.kt +++ b/formats/json-okio/commonMain/src/kotlinx/serialization/json/okio/internal/OkioJsonStreams.kt @@ -50,7 +50,7 @@ internal class OkioSerialReader(private val source: BufferedSource): SerialReade override fun read(buffer: CharArray, bufferOffset: Int, count: Int): Int { var i = 0 while (i < count && !source.exhausted()) { - buffer[i] = source.readUtf8CodePoint().toChar() + buffer[bufferOffset + i] = source.readUtf8CodePoint().toChar() i++ } return if (i > 0) i else -1 diff --git a/formats/json-tests/commonTest/src/kotlinx/serialization/json/JsonHugeDataSerializationTest.kt b/formats/json-tests/commonTest/src/kotlinx/serialization/json/JsonHugeDataSerializationTest.kt new file mode 100644 index 000000000..0a633268a --- /dev/null +++ b/formats/json-tests/commonTest/src/kotlinx/serialization/json/JsonHugeDataSerializationTest.kt @@ -0,0 +1,40 @@ +/* + * Copyright 2017-2022 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license. + */ + +package kotlinx.serialization.json + +import kotlinx.serialization.Serializable +import kotlin.test.Test + +class JsonHugeDataSerializationTest : JsonTestBase() { + + @Serializable + private data class Node( + val children: List + ) + + private fun createNodes(count: Int, depth: Int): List { + val ret = mutableListOf() + if (depth == 0) return ret + for (i in 0 until count) { + ret.add(Node(createNodes(1, depth - 1))) + } + return ret + } + + @Test + fun test() { + // create some huge instance + val rootNode = Node(createNodes(1000, 10)) + + val expectedJson = Json.encodeToString(Node.serializer(), rootNode) + + /* + The assertJsonFormAndRestored function, when checking the encoding, will call Json.encodeToString(...) for `JsonTestingMode.STREAMING` + since the string `expectedJson` was generated by the same function, the test will always consider + the encoding to the `STREAMING` mode is correct, even if there was actually an error there. So only TREE, JAVA_STREAMS and OKIO are actually being tested here + */ + assertJsonFormAndRestored(Node.serializer(), rootNode, expectedJson) + } +} diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/JsonLexer.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/JsonLexer.kt index e02364ee4..83483eac4 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/JsonLexer.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/JsonLexer.kt @@ -14,23 +14,31 @@ private const val DEFAULT_THRESHOLD = 128 * For some reason this hand-rolled implementation is faster than * fun ArrayAsSequence(s: CharArray): CharSequence = java.nio.CharBuffer.wrap(s, 0, length) */ -private class ArrayAsSequence(private val source: CharArray) : CharSequence { - override val length: Int = source.size +internal class ArrayAsSequence(val buffer: CharArray) : CharSequence { + override var length: Int = buffer.size - override fun get(index: Int): Char = source[index] + override fun get(index: Int): Char = buffer[index] override fun subSequence(startIndex: Int, endIndex: Int): CharSequence { - return source.concatToString(startIndex, endIndex) + return buffer.concatToString(startIndex, minOf(endIndex, length)) + } + + fun substring(startIndex: Int, endIndex: Int): String { + return buffer.concatToString(startIndex, minOf(endIndex, length)) + } + + fun trim(newSize: Int) { + length = minOf(buffer.size, newSize) } } internal class ReaderJsonLexer( private val reader: SerialReader, - private var _source: CharArray = CharArray(BATCH_SIZE) + charsBuffer: CharArray = CharArray(BATCH_SIZE) ) : AbstractJsonLexer() { private var threshold: Int = DEFAULT_THRESHOLD // chars - override var source: CharSequence = ArrayAsSequence(_source) + override val source: ArrayAsSequence = ArrayAsSequence(charsBuffer) init { preload(0) @@ -65,22 +73,22 @@ internal class ReaderJsonLexer( return false } - private fun preload(spaceLeft: Int) { - val buffer = _source - buffer.copyInto(buffer, 0, currentPosition, currentPosition + spaceLeft) - var read = spaceLeft - val sizeTotal = _source.size - while (read != sizeTotal) { - val actual = reader.read(buffer, read, sizeTotal - read) + private fun preload(unprocessedCount: Int) { + val buffer = source.buffer + if (unprocessedCount != 0) { + buffer.copyInto(buffer, 0, currentPosition, currentPosition + unprocessedCount) + } + var filledCount = unprocessedCount + val sizeTotal = source.length + while (filledCount != sizeTotal) { + val actual = reader.read(buffer, filledCount, sizeTotal - filledCount) if (actual == -1) { // EOF, resizing the array so it matches input size - // Can also be done by extracting source.length to a separate var - _source = _source.copyOf(read) - source = ArrayAsSequence(_source) + source.trim(filledCount) threshold = -1 break } - read += actual + filledCount += actual } currentPosition = 0 } @@ -115,7 +123,7 @@ internal class ReaderJsonLexer( override fun ensureHaveChars() { val cur = currentPosition - val oldSize = _source.size + val oldSize = source.length val spaceLeft = oldSize - cur if (spaceLeft > threshold) return // warning: current position is not updated during string consumption @@ -152,19 +160,19 @@ internal class ReaderJsonLexer( } override fun indexOf(char: Char, startPos: Int): Int { - val src = _source - for (i in startPos until src.size) { + val src = source + for (i in startPos until src.length) { if (src[i] == char) return i } return -1 } override fun substring(startPos: Int, endPos: Int): String { - return _source.concatToString(startPos, endPos) + return source.substring(startPos, endPos) } override fun appendRange(fromIndex: Int, toIndex: Int) { - escapedString.appendRange(_source, fromIndex, toIndex) + escapedString.appendRange(source.buffer, fromIndex, toIndex) } // Can be carefully implemented but postponed for now