Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed decoding of huge JSON data for okio streams #2007

Merged
merged 7 commits into from Aug 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -50,7 +50,7 @@ internal class OkioSerialReader(private val source: BufferedSource): SerialReade
override fun read(buffer: CharArray, bufferOffset: Int, count: Int): Int {
var i = 0
while (i < count && !source.exhausted()) {
buffer[i] = source.readUtf8CodePoint().toChar()
buffer[bufferOffset + i] = source.readUtf8CodePoint().toChar()
i++
}
return if (i > 0) i else -1
Expand Down
@@ -0,0 +1,40 @@
/*
* Copyright 2017-2022 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license.
*/

package kotlinx.serialization.json

import kotlinx.serialization.Serializable
import kotlin.test.Test

class JsonHugeDataSerializationTest : JsonTestBase() {

@Serializable
private data class Node(
val children: List<Node>
)

private fun createNodes(count: Int, depth: Int): List<Node> {
val ret = mutableListOf<Node>()
if (depth == 0) return ret
for (i in 0 until count) {
ret.add(Node(createNodes(1, depth - 1)))
}
return ret
}

@Test
fun test() {
// create some huge instance
val rootNode = Node(createNodes(1000, 10))

val expectedJson = Json.encodeToString(Node.serializer(), rootNode)

/*
The assertJsonFormAndRestored function, when checking the encoding, will call Json.encodeToString(...) for `JsonTestingMode.STREAMING`
since the string `expectedJson` was generated by the same function, the test will always consider
the encoding to the `STREAMING` mode is correct, even if there was actually an error there. So only TREE, JAVA_STREAMS and OKIO are actually being tested here
*/
assertJsonFormAndRestored(Node.serializer(), rootNode, expectedJson)
}
}
Expand Up @@ -14,23 +14,31 @@ private const val DEFAULT_THRESHOLD = 128
* For some reason this hand-rolled implementation is faster than
* fun ArrayAsSequence(s: CharArray): CharSequence = java.nio.CharBuffer.wrap(s, 0, length)
*/
private class ArrayAsSequence(private val source: CharArray) : CharSequence {
override val length: Int = source.size
internal class ArrayAsSequence(val buffer: CharArray) : CharSequence {
override var length: Int = buffer.size

override fun get(index: Int): Char = source[index]
override fun get(index: Int): Char = buffer[index]

override fun subSequence(startIndex: Int, endIndex: Int): CharSequence {
return source.concatToString(startIndex, endIndex)
return buffer.concatToString(startIndex, minOf(endIndex, length))
}

fun substring(startIndex: Int, endIndex: Int): String {
return buffer.concatToString(startIndex, minOf(endIndex, length))
}

fun trim(newSize: Int) {
length = minOf(buffer.size, newSize)
}
}

internal class ReaderJsonLexer(
private val reader: SerialReader,
private var _source: CharArray = CharArray(BATCH_SIZE)
charsBuffer: CharArray = CharArray(BATCH_SIZE)
) : AbstractJsonLexer() {
private var threshold: Int = DEFAULT_THRESHOLD // chars

override var source: CharSequence = ArrayAsSequence(_source)
override val source: ArrayAsSequence = ArrayAsSequence(charsBuffer)

init {
preload(0)
Expand Down Expand Up @@ -65,22 +73,22 @@ internal class ReaderJsonLexer(
return false
}

private fun preload(spaceLeft: Int) {
val buffer = _source
buffer.copyInto(buffer, 0, currentPosition, currentPosition + spaceLeft)
var read = spaceLeft
val sizeTotal = _source.size
while (read != sizeTotal) {
val actual = reader.read(buffer, read, sizeTotal - read)
private fun preload(unprocessedCount: Int) {
val buffer = source.buffer
if (unprocessedCount != 0) {
buffer.copyInto(buffer, 0, currentPosition, currentPosition + unprocessedCount)
}
var filledCount = unprocessedCount
val sizeTotal = source.length
while (filledCount != sizeTotal) {
val actual = reader.read(buffer, filledCount, sizeTotal - filledCount)
if (actual == -1) {
// EOF, resizing the array so it matches input size
// Can also be done by extracting source.length to a separate var
_source = _source.copyOf(read)
source = ArrayAsSequence(_source)
source.trim(filledCount)
threshold = -1
break
}
read += actual
filledCount += actual
}
currentPosition = 0
}
Expand Down Expand Up @@ -115,7 +123,7 @@ internal class ReaderJsonLexer(

override fun ensureHaveChars() {
val cur = currentPosition
val oldSize = _source.size
val oldSize = source.length
val spaceLeft = oldSize - cur
if (spaceLeft > threshold) return
// warning: current position is not updated during string consumption
Expand Down Expand Up @@ -152,19 +160,19 @@ internal class ReaderJsonLexer(
}

override fun indexOf(char: Char, startPos: Int): Int {
val src = _source
for (i in startPos until src.size) {
val src = source
for (i in startPos until src.length) {
if (src[i] == char) return i
}
return -1
}

override fun substring(startPos: Int, endPos: Int): String {
return _source.concatToString(startPos, endPos)
return source.substring(startPos, endPos)
}

override fun appendRange(fromIndex: Int, toIndex: Int) {
escapedString.appendRange(_source, fromIndex, toIndex)
escapedString.appendRange(source.buffer, fromIndex, toIndex)
}

// Can be carefully implemented but postponed for now
Expand Down