/
JsonLexer.kt
174 lines (154 loc) · 5.86 KB
/
JsonLexer.kt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
/*
* Copyright 2017-2021 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license.
*/
@file:Suppress("INVISIBLE_REFERENCE", "INVISIBLE_MEMBER")
package kotlinx.serialization.json.internal
@PublishedApi
internal const val BATCH_SIZE: Int = 16 * 1024
private const val DEFAULT_THRESHOLD = 128
/**
* For some reason this hand-rolled implementation is faster than
* fun ArrayAsSequence(s: CharArray): CharSequence = java.nio.CharBuffer.wrap(s, 0, length)
*/
private class ArrayAsSequence(private val source: CharArray) : CharSequence {
override val length: Int = source.size
override fun get(index: Int): Char = source[index]
override fun subSequence(startIndex: Int, endIndex: Int): CharSequence {
return source.concatToString(startIndex, endIndex)
}
}
internal class ReaderJsonLexer(
private val reader: SerialReader,
private var _source: CharArray = CharArray(BATCH_SIZE)
) : AbstractJsonLexer() {
private var threshold: Int = DEFAULT_THRESHOLD // chars
override var source: CharSequence = ArrayAsSequence(_source)
init {
preload(0)
}
override fun tryConsumeComma(): Boolean {
val current = skipWhitespaces()
if (current >= source.length || current == -1) return false
if (source[current] == ',') {
++currentPosition
return true
}
return false
}
override fun canConsumeValue(): Boolean {
ensureHaveChars()
var current = currentPosition
while (true) {
current = prefetchOrEof(current)
if (current == -1) break // could be inline function but KT-1436
val c = source[current]
// Inlined skipWhitespaces without field spill and nested loop. Also faster then char2TokenClass
if (c == ' ' || c == '\n' || c == '\r' || c == '\t') {
++current
continue
}
currentPosition = current
return isValidValueStart(c)
}
currentPosition = current
return false
}
private fun preload(spaceLeft: Int) {
val buffer = _source
if (spaceLeft != 0) {
buffer.copyInto(buffer, 0, currentPosition, currentPosition + spaceLeft)
}
var read = spaceLeft
val sizeTotal = _source.size
while (read != sizeTotal) {
val actual = reader.read(buffer, read, sizeTotal - read)
if (actual == -1) {
// EOF, resizing the array so it matches input size
// Can also be done by extracting source.length to a separate var
_source = _source.copyOf(read)
source = ArrayAsSequence(_source)
threshold = -1
break
}
read += actual
}
currentPosition = 0
}
override fun prefetchOrEof(position: Int): Int {
if (position < source.length) return position
currentPosition = position
ensureHaveChars()
if (currentPosition != 0 || source.isEmpty()) return -1 // if something was loaded, then it would be zero.
return 0
}
override fun consumeNextToken(): Byte {
ensureHaveChars()
val source = source
var cpos = currentPosition
while (true) {
cpos = prefetchOrEof(cpos)
if (cpos == -1) break
val ch = source[cpos++]
return when (val tc = charToTokenClass(ch)) {
TC_WHITESPACE -> continue
else -> {
currentPosition = cpos
tc
}
}
}
currentPosition = cpos
return TC_EOF
}
override fun ensureHaveChars() {
val cur = currentPosition
val oldSize = _source.size
val spaceLeft = oldSize - cur
if (spaceLeft > threshold) return
// warning: current position is not updated during string consumption
// resizing
preload(spaceLeft)
}
override fun consumeKeyString(): String {
/*
* For strings we assume that escaped symbols are rather an exception, so firstly
* we optimistically scan for closing quote via intrinsified and blazing-fast 'indexOf',
* than do our pessimistic check for backslash and fallback to slow-path if necessary.
*/
consumeNextToken(STRING)
var current = currentPosition
val closingQuote = indexOf('"', current)
if (closingQuote == -1) {
current = prefetchOrEof(current)
if (current == -1) fail(TC_STRING)
// it's also possible just to resize buffer,
// instead of falling back to slow path,
// not sure what is better
else return consumeString(source, currentPosition, current)
}
// Now we _optimistically_ know where the string ends (it might have been an escaped quote)
for (i in current until closingQuote) {
// Encountered escape sequence, should fallback to "slow" path and symmbolic scanning
if (source[i] == STRING_ESC) {
return consumeString(source, currentPosition, i)
}
}
this.currentPosition = closingQuote + 1
return substring(current, closingQuote)
}
override fun indexOf(char: Char, startPos: Int): Int {
val src = _source
for (i in startPos until src.size) {
if (src[i] == char) return i
}
return -1
}
override fun substring(startPos: Int, endPos: Int): String {
return _source.concatToString(startPos, endPos)
}
override fun appendRange(fromIndex: Int, toIndex: Int) {
escapedString.appendRange(_source, fromIndex, toIndex)
}
// Can be carefully implemented but postponed for now
override fun consumeLeadingMatchingValue(keyToMatch: String, isLenient: Boolean): String? = null
}