Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add support for better handling with large arrays #402

Merged
merged 6 commits into from
Apr 17, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
43 changes: 43 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ compile-json-stringify date format x 1,086,187 ops/sec ±0.16% (99 runs sampled)
- <a href="#long">`Long integers`</a>
- <a href="#integer">`Integers`</a>
- <a href="#nullable">`Nullable`</a>
- <a href="#largearrays">`Large Arrays`</a>
- <a href="#security">`Security Notice`</a>
- <a href="#acknowledgements">`Acknowledgements`</a>
- <a href="#license">`License`</a>
Expand Down Expand Up @@ -117,6 +118,7 @@ const stringify = fastJson(mySchema, {
- `schema`: external schemas references by $ref property. [More details](#ref)
- `ajv`: [ajv v8 instance's settings](https://ajv.js.org/options.html) for those properties that require `ajv`. [More details](#anyof)
- `rounding`: setup how the `integer` types will be rounded when not integers. [More details](#integer)
- `largeArrayMechanism`: settle the mechanism that should be used to handle large (over `20000` items) arrays. [More details](#largearrays)


<a name="api"></a>
Expand Down Expand Up @@ -582,6 +584,47 @@ Otherwise, instead of raising an error, null values will be coerced as follows:
- `string` -> `""`
- `boolean` -> `false`

<a name="largearrays"></a>
#### Large Arrays

Large arrays are, for the scope of this document, defined as arrays containing
`20000` items or more.

At some point the overhead caused by the default mechanism used by
`fast-json-stringify` to handle arrays starts increasing exponentially, leading
to slow overall executions.

In order to improve that the user can set the `largeArrayMechanism` option with
one of the following values:

- `default` - Default behavior
- `json-stringify` - This option will remove support for schema validation
within **large arrays** completely. By doing so the overhead previously
mentioned is nulled, greatly improving execution time. Mind there's no change
in behavior for arrays with less than `20000` items
- `array-join` - This option is a compromise between the last two.
RafaelGSS marked this conversation as resolved.
Show resolved Hide resolved
`fastify-json-stringify` works by concatenating lots of string pieces into the
RafaelGSS marked this conversation as resolved.
Show resolved Hide resolved
final JSON string. With this option set, **large arrays** would be stringified
by joining their elements' stringified versions using `Array.join`, instead
of string concatenation

##### Benchmarks

For reference, here goes some benchmarks for comparison over the three
mechanisms. Benchmarks conducted on an old machine.

- Machine: `ST1000LM024 HN-M 1TB HDD, Intel Core i7-3610QM @ 2.3GHz, 12GB RAM, 4C/8T`.
- Node.js `v16.13.1`

```
JSON.stringify large array x 157 ops/sec ±0.73% (86 runs sampled)
fast-json-stringify large array default x 48.72 ops/sec ±4.92% (48 runs sampled)
fast-json-stringify large array json-stringify x 157 ops/sec ±0.76% (86 runs sampled)
fast-json-stringify large array array-join x 69.04 ops/sec ±4.47% (53 runs sampled)
compile-json-stringify large array x 175 ops/sec ±4.47% (79 runs sampled)
mcollina marked this conversation as resolved.
Show resolved Hide resolved
AJV Serialize large array x 58.76 ops/sec ±4.59% (60 runs sampled)
```

<a name="security"></a>
## Security notice

Expand Down
87 changes: 80 additions & 7 deletions bench.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
const benchmark = require('benchmark')
const suite = new benchmark.Suite()

const STR_LEN = 1e4
const LARGE_ARRAY_SIZE = 2e4
const MULTI_ARRAY_LENGHT = 1e3

const schema = {
title: 'Example Schema',
type: 'object',
Expand Down Expand Up @@ -89,7 +93,8 @@ const obj = {

const date = new Date()

const multiArray = []
const multiArray = new Array(MULTI_ARRAY_LENGHT)
const largeArray = new Array(LARGE_ARRAY_SIZE)

const CJS = require('compile-json-stringify')
const CJSStringify = CJS(schemaCJS)
Expand All @@ -99,7 +104,13 @@ const CJSStringifyString = CJS({ type: 'string' })

const FJS = require('.')
const stringify = FJS(schema)
const stringifyArray = FJS(arraySchema)
const stringifyArrayDefault = FJS(arraySchema)
const stringifyArrayJSONStringify = FJS(arraySchema, {
largeArrayMechanism: 'json-stringify'
})
const stringifyArrayArrayJoin = FJS(arraySchema, {
largeArrayMechanism: 'array-join'
})
const stringifyDate = FJS(dateFormatSchema)
const stringifyString = FJS({ type: 'string' })
let str = ''
Expand All @@ -110,18 +121,48 @@ const ajvSerialize = ajv.compileSerializer(schemaAJVJTD)
const ajvSerializeArray = ajv.compileSerializer(arraySchemaAJVJTD)
const ajvSerializeString = ajv.compileSerializer({ type: 'string' })

const getRandomString = (length) => {
if (!Number.isInteger(length)) {
throw new Error('Expected integer length')
}

const validCharacters = 'abcdefghijklmnopqrstuvwxyz'
const nValidCharacters = 26

let result = ''
for (let i = 0; i < length; ++i) {
result += validCharacters[Math.floor(Math.random() * nValidCharacters)]
}

return result[0].toUpperCase() + result.slice(1)
}

// eslint-disable-next-line
for (var i = 0; i < 10000; i++) {
for (let i = 0; i < STR_LEN; i++) {
largeArray[i] = {
firstName: getRandomString(8),
lastName: getRandomString(6),
age: Math.ceil(Math.random() * 99)
}

str += i
if (i % 100 === 0) {
str += '"'
}
}

for (let i = STR_LEN; i < LARGE_ARRAY_SIZE; ++i) {
largeArray[i] = {
firstName: getRandomString(10),
lastName: getRandomString(4),
age: Math.ceil(Math.random() * 99)
}
}

Number(str)

for (i = 0; i < 1000; i++) {
multiArray.push(obj)
for (let i = 0; i < MULTI_ARRAY_LENGHT; i++) {
multiArray[i] = obj
}

suite.add('FJS creation', function () {
Expand All @@ -138,8 +179,16 @@ suite.add('JSON.stringify array', function () {
JSON.stringify(multiArray)
})

suite.add('fast-json-stringify array', function () {
stringifyArray(multiArray)
suite.add('fast-json-stringify array default', function () {
stringifyArrayDefault(multiArray)
})

suite.add('fast-json-stringify array json-stringify', function () {
stringifyArrayJSONStringify(multiArray)
})

suite.add('fast-json-stringify array array-join', function () {
stringifyArrayArrayJoin(multiArray)
})

suite.add('compile-json-stringify array', function () {
Expand All @@ -150,6 +199,30 @@ suite.add('AJV Serialize array', function () {
ajvSerializeArray(multiArray)
})

suite.add('JSON.stringify large array', function () {
JSON.stringify(largeArray)
})

suite.add('fast-json-stringify large array default', function () {
stringifyArrayDefault(largeArray)
})

suite.add('fast-json-stringify large array json-stringify', function () {
stringifyArrayJSONStringify(largeArray)
})

suite.add('fast-json-stringify large array array-join', function () {
stringifyArrayArrayJoin(largeArray)
})

suite.add('compile-json-stringify large array', function () {
CJSStringifyArray(largeArray)
})

suite.add('AJV Serialize large array', function () {
ajvSerializeArray(largeArray)
})

suite.add('JSON.stringify long string', function () {
JSON.stringify(str)
})
Expand Down
55 changes: 53 additions & 2 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,14 @@ const fjsCloned = Symbol('fast-json-stringify.cloned')
const { randomUUID } = require('crypto')

const validate = require('./schema-validator')

let stringSimilarity = null
let largeArrayMechanism = 'default'
const validLargeArrayMechanisms = [
'default',
'json-stringify',
'array-join'
]

const addComma = `
if (addComma) {
Expand Down Expand Up @@ -73,6 +80,14 @@ function build (schema, options) {
}
}

if (options.largeArrayMechanism) {
if (validLargeArrayMechanisms.includes(options.largeArrayMechanism)) {
largeArrayMechanism = options.largeArrayMechanism
} else {
throw new Error(`Unsupported large array mechanism ${options.rounding}`)
}
}

/* eslint no-new-func: "off" */
let code = `
'use strict'
Expand Down Expand Up @@ -1028,7 +1043,9 @@ function buildArray (location, code, name, key = null) {
}

code += `
var l = obj.length
var l = obj.length`

const concatSnippet = `
var jsonOutput= ''
for (var i = 0; i < l; i++) {
var json = ''
Expand All @@ -1040,7 +1057,32 @@ function buildArray (location, code, name, key = null) {
}
}
return \`[\${jsonOutput}]\`
}`

switch (largeArrayMechanism) {
case 'default':
break

case 'json-stringify':
code += `
if (l && l >= 20000) {
return JSON.stringify(obj)
}`
break

case 'array-join':
code += `
if (l && l >= 20000) {
return \`[\${obj.map(${result.mapFnName}).join(',')}]\`
}`
break

default:
throw new Error(`Unsupported large array mechanism ${largeArrayMechanism}`)
}

code += `
${concatSnippet}
${result.laterCode}
`

Expand Down Expand Up @@ -1148,22 +1190,27 @@ function nested (laterCode, name, key, location, subKey, isArray) {

switch (type) {
case 'null':
funcName = '$asNull'
code += `
json += $asNull()
`
break
case 'string': {
funcName = '$asString'
const stringSerializer = getStringSerializer(schema.format)
code += nullable ? `json += obj${accessor} === null ? null : ${stringSerializer}(obj${accessor})` : `json += ${stringSerializer}(obj${accessor})`
break
}
case 'integer':
funcName = '$asInteger'
code += nullable ? `json += obj${accessor} === null ? null : $asInteger(obj${accessor})` : `json += $asInteger(obj${accessor})`
break
case 'number':
funcName = '$asNumber'
code += nullable ? `json += obj${accessor} === null ? null : $asNumber(obj${accessor})` : `json += $asNumber(obj${accessor})`
break
case 'boolean':
funcName = '$asBoolean'
code += nullable ? `json += obj${accessor} === null ? null : $asBoolean(obj${accessor})` : `json += $asBoolean(obj${accessor})`
break
case 'object':
Expand All @@ -1181,6 +1228,7 @@ function nested (laterCode, name, key, location, subKey, isArray) {
`
break
case undefined:
funcName = '$asNull'
if ('anyOf' in schema) {
// beware: dereferenceOfRefs has side effects and changes schema.anyOf
const anyOfLocations = dereferenceOfRefs(location, 'anyOf')
Expand Down Expand Up @@ -1319,7 +1367,8 @@ function nested (laterCode, name, key, location, subKey, isArray) {

return {
code,
laterCode
laterCode,
mapFnName: funcName
}
}

Expand All @@ -1335,6 +1384,8 @@ function isEmpty (schema) {

module.exports = build

module.exports.validLargeArrayMechanisms = validLargeArrayMechanisms

module.exports.restore = function ({ code, ajv }) {
// eslint-disable-next-line
return (Function.apply(null, ['ajv', code])
Expand Down