Skip to content

Commit

Permalink
Fixed decoding of huge JSON data for okio streams
Browse files Browse the repository at this point in the history
Fixes #2006

Co-authored-by: Leonid Startsev <sandwwraith@users.noreply.github.com>
  • Loading branch information
shanshin and sandwwraith committed Aug 15, 2022
1 parent 83b6e33 commit 0c0648e
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 23 deletions.
Expand Up @@ -50,7 +50,7 @@ internal class OkioSerialReader(private val source: BufferedSource): SerialReade
override fun read(buffer: CharArray, bufferOffset: Int, count: Int): Int {
var i = 0
while (i < count && !source.exhausted()) {
buffer[i] = source.readUtf8CodePoint().toChar()
buffer[bufferOffset + i] = source.readUtf8CodePoint().toChar()
i++
}
return if (i > 0) i else -1
Expand Down
@@ -0,0 +1,40 @@
/*
* Copyright 2017-2022 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license.
*/

package kotlinx.serialization.json

import kotlinx.serialization.Serializable
import kotlin.test.Test

class JsonHugeDataSerializationTest : JsonTestBase() {

@Serializable
private data class Node(
val children: List<Node>
)

private fun createNodes(count: Int, depth: Int): List<Node> {
val ret = mutableListOf<Node>()
if (depth == 0) return ret
for (i in 0 until count) {
ret.add(Node(createNodes(1, depth - 1)))
}
return ret
}

@Test
fun test() {
// create some huge instance
val rootNode = Node(createNodes(1000, 10))

val expectedJson = Json.encodeToString(Node.serializer(), rootNode)

/*
The assertJsonFormAndRestored function, when checking the encoding, will call Json.encodeToString(...) for `JsonTestingMode.STREAMING`
since the string `expectedJson` was generated by the same function, the test will always consider
the encoding to the `STREAMING` mode is correct, even if there was actually an error there. So only TREE, JAVA_STREAMS and OKIO are actually being tested here
*/
assertJsonFormAndRestored(Node.serializer(), rootNode, expectedJson)
}
}
Expand Up @@ -14,23 +14,31 @@ private const val DEFAULT_THRESHOLD = 128
* For some reason this hand-rolled implementation is faster than
* fun ArrayAsSequence(s: CharArray): CharSequence = java.nio.CharBuffer.wrap(s, 0, length)
*/
private class ArrayAsSequence(private val source: CharArray) : CharSequence {
override val length: Int = source.size
internal class ArrayAsSequence(val buffer: CharArray) : CharSequence {
override var length: Int = buffer.size

override fun get(index: Int): Char = source[index]
override fun get(index: Int): Char = buffer[index]

override fun subSequence(startIndex: Int, endIndex: Int): CharSequence {
return source.concatToString(startIndex, endIndex)
return buffer.concatToString(startIndex, minOf(endIndex, length))
}

fun substring(startIndex: Int, endIndex: Int): String {
return buffer.concatToString(startIndex, minOf(endIndex, length))
}

fun trim(newSize: Int) {
length = minOf(buffer.size, newSize)
}
}

internal class ReaderJsonLexer(
private val reader: SerialReader,
private var _source: CharArray = CharArray(BATCH_SIZE)
charsBuffer: CharArray = CharArray(BATCH_SIZE)
) : AbstractJsonLexer() {
private var threshold: Int = DEFAULT_THRESHOLD // chars

override var source: CharSequence = ArrayAsSequence(_source)
override val source: ArrayAsSequence = ArrayAsSequence(charsBuffer)

init {
preload(0)
Expand Down Expand Up @@ -65,22 +73,22 @@ internal class ReaderJsonLexer(
return false
}

private fun preload(spaceLeft: Int) {
val buffer = _source
buffer.copyInto(buffer, 0, currentPosition, currentPosition + spaceLeft)
var read = spaceLeft
val sizeTotal = _source.size
while (read != sizeTotal) {
val actual = reader.read(buffer, read, sizeTotal - read)
private fun preload(unprocessedCount: Int) {
val buffer = source.buffer
if (unprocessedCount != 0) {
buffer.copyInto(buffer, 0, currentPosition, currentPosition + unprocessedCount)
}
var filledCount = unprocessedCount
val sizeTotal = source.length
while (filledCount != sizeTotal) {
val actual = reader.read(buffer, filledCount, sizeTotal - filledCount)
if (actual == -1) {
// EOF, resizing the array so it matches input size
// Can also be done by extracting source.length to a separate var
_source = _source.copyOf(read)
source = ArrayAsSequence(_source)
source.trim(filledCount)
threshold = -1
break
}
read += actual
filledCount += actual
}
currentPosition = 0
}
Expand Down Expand Up @@ -115,7 +123,7 @@ internal class ReaderJsonLexer(

override fun ensureHaveChars() {
val cur = currentPosition
val oldSize = _source.size
val oldSize = source.length
val spaceLeft = oldSize - cur
if (spaceLeft > threshold) return
// warning: current position is not updated during string consumption
Expand Down Expand Up @@ -152,19 +160,19 @@ internal class ReaderJsonLexer(
}

override fun indexOf(char: Char, startPos: Int): Int {
val src = _source
for (i in startPos until src.size) {
val src = source
for (i in startPos until src.length) {
if (src[i] == char) return i
}
return -1
}

override fun substring(startPos: Int, endPos: Int): String {
return _source.concatToString(startPos, endPos)
return source.substring(startPos, endPos)
}

override fun appendRange(fromIndex: Int, toIndex: Int) {
escapedString.appendRange(_source, fromIndex, toIndex)
escapedString.appendRange(source.buffer, fromIndex, toIndex)
}

// Can be carefully implemented but postponed for now
Expand Down

0 comments on commit 0c0648e

Please sign in to comment.