From 0d4d16ab30589eeeaff3524331b9e761e015f8b2 Mon Sep 17 00:00:00 2001 From: Francis Gulotta Date: Tue, 2 Oct 2018 22:15:55 -0400 Subject: [PATCH] indexOf that conforms to nodejs 10 tests This is a JS implementation of indexOf for searching for BufferLists, Buffers, strings and numbers inside a BufferList. It passes the node 10 buffer indexof test suite and works on node 4+. There are quite of a few performance improvements possible, single byte search values can use the native Buffer.indexOf command. Multibyte buffer search values could too with some extra work. I don't believe searching for another buffer list can easily be made faster. This takes largely from two places and brings them together - The nodejs buffer index of tests https://github.com/nodejs/node/blob/5d8373a498a50b1387464391402ef22636439303/test/parallel/test-buffer-indexof.js - from @soldair's node-buffer-indexof https://www.npmjs.com/package/buffer-indexof --- bl.js | 65 +++++++- test/test.js | 450 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 513 insertions(+), 2 deletions(-) diff --git a/bl.js b/bl.js index 2e520b4..12b1618 100644 --- a/bl.js +++ b/bl.js @@ -1,3 +1,4 @@ +'use strict' var DuplexStream = require('readable-stream').Duplex , util = require('util') , Buffer = require('safe-buffer').Buffer @@ -54,7 +55,7 @@ BufferList.prototype.append = function append (buf) { var i = 0 if (Buffer.isBuffer(buf)) { - this._appendBuffer(buf); + this._appendBuffer(buf) } else if (Array.isArray(buf)) { for (; i < buf.length; i++) this.append(buf[i]) @@ -68,7 +69,7 @@ BufferList.prototype.append = function append (buf) { if (typeof buf == 'number') buf = buf.toString() - this._appendBuffer(Buffer.from(buf)); + this._appendBuffer(Buffer.from(buf)) } return this @@ -251,6 +252,66 @@ BufferList.prototype.destroy = function destroy () { this.push(null) } +BufferList.prototype.indexOf = function (search, offset, encoding) { + if (encoding === undefined && typeof offset === 'string') { + encoding = offset + offset = undefined + } + if (typeof search === 'function' || Array.isArray(search)) { + throw new TypeError('The "value" argument must be one of type string, Buffer, BufferList, or Uint8Array.') + } else if (typeof search === 'number') { + search = Buffer.from([search]) + } else if (typeof search === 'string') { + search = Buffer.from(search, encoding) + } else if (!search instanceof BufferList && !Buffer.isBuffer(search)) { + search = Buffer.from(search) + } + search = new BufferList(search) + + offset = Number(offset || 0) + if (isNaN(offset)) { + offset = 0 + } + + if (offset < 0) { + offset = this.length + offset + } + + if (offset < 0) { + offset = 0 + } + + if (search.length === 0) { + return offset > this.length ? this.length : offset + } + + let searchOffset = 0 + let searchPosition = -1 + + for (let blSearchOffset = offset; blSearchOffset < this.length ; ++blSearchOffset) { + if(this.get(blSearchOffset) != search.get(searchOffset)){ + searchPosition = -1 + blSearchOffset -= searchOffset-1 + searchOffset = 0 + } + + if(this.get(blSearchOffset) == search.get(searchOffset)) { + if(searchPosition == -1) { + searchPosition = blSearchOffset + } + ++searchOffset + if(searchOffset == search.length) { + break + } + } + } + + if (searchPosition > -1 && this.length - searchPosition < search.length) { + return -1 + } + return searchPosition +} + ;(function () { var methods = { diff --git a/test/test.js b/test/test.js index 6f427e4..bbc40a2 100644 --- a/test/test.js +++ b/test/test.js @@ -1,3 +1,5 @@ +'use strict' + var tape = require('tape') , crypto = require('crypto') , fs = require('fs') @@ -425,6 +427,454 @@ tape('test toString encoding', function (t) { t.end() }) +tape('indexOf single byte needle', t => { + const bl = new BufferList(['abcdefg', 'abcdefg']) + t.equal(bl.indexOf('e'), 4) + t.equal(bl.indexOf('e', 5), 11) + t.end() +}) + +tape('indexOf multiple byte needle', t => { + const bl = new BufferList(['abcdefg', 'abcdefg']) + t.equal(bl.indexOf('ef'), 4) + t.equal(bl.indexOf('ef', 5), 11) + t.end() +}) + +tape('indexOf multiple byte needles across buffer boundaries', t => { + const bl = new BufferList(['abcdefg', 'abcdefg']) + t.equal(bl.indexOf('fgabc'), 5) + t.end() +}) + +tape('indexOf takes a buffer list search', t => { + const bl = new BufferList(['abcdefg', 'abcdefg']) + const search = new BufferList('fgabc') + t.equal(bl.indexOf(search), 5) + t.end() +}) + +tape('indexOf a zero byte needle', t => { + const b = new BufferList('abcdef') + const buf_empty = Buffer.from('') + t.equal(b.indexOf(''), 0) + t.equal(b.indexOf('', 1), 1) + t.equal(b.indexOf('', b.length + 1), b.length) + t.equal(b.indexOf('', Infinity), b.length) + t.equal(b.indexOf(buf_empty), 0) + t.equal(b.indexOf(buf_empty, 1), 1) + t.equal(b.indexOf(buf_empty, b.length + 1), b.length) + t.equal(b.indexOf(buf_empty, Infinity), b.length) + t.end() +}) + +// only present in node 6+ +;(process.version.substr(1).split('.')[0] >= 6) && tape('indexOf latin1 and binary encoding', t => { + const b = new BufferList('abcdef') + + // test latin1 encoding + t.equal( + new BufferList(Buffer.from(b.toString('latin1'), 'latin1')) + .indexOf('d', 0, 'latin1'), + 3 + ) + t.equal( + new BufferList(Buffer.from(b.toString('latin1'), 'latin1')) + .indexOf(Buffer.from('d', 'latin1'), 0, 'latin1'), + 3 + ) + t.equal( + new BufferList(Buffer.from('aa\u00e8aa', 'latin1')) + .indexOf('\u00e8', 'latin1'), + 2 + ) + t.equal( + new BufferList(Buffer.from('\u00e8', 'latin1')) + .indexOf('\u00e8', 'latin1'), + 0 + ) + t.equal( + new BufferList(Buffer.from('\u00e8', 'latin1')) + .indexOf(Buffer.from('\u00e8', 'latin1'), 'latin1'), + 0 + ) + + // test binary encoding + t.equal( + new BufferList(Buffer.from(b.toString('binary'), 'binary')) + .indexOf('d', 0, 'binary'), + 3 + ) + t.equal( + new BufferList(Buffer.from(b.toString('binary'), 'binary')) + .indexOf(Buffer.from('d', 'binary'), 0, 'binary'), + 3 + ) + t.equal( + new BufferList(Buffer.from('aa\u00e8aa', 'binary')) + .indexOf('\u00e8', 'binary'), + 2 + ) + t.equal( + new BufferList(Buffer.from('\u00e8', 'binary')) + .indexOf('\u00e8', 'binary'), + 0 + ) + t.equal( + new BufferList(Buffer.from('\u00e8', 'binary')) + .indexOf(Buffer.from('\u00e8', 'binary'), 'binary'), + 0 + ) + t.end() +}) + +tape('indexOf the entire nodejs10 buffer test suite', t => { + const b = new BufferList('abcdef') + const buf_a = Buffer.from('a') + const buf_bc = Buffer.from('bc') + const buf_f = Buffer.from('f') + const buf_z = Buffer.from('z') + + const stringComparison = 'abcdef' + + t.equal(b.indexOf('a'), 0) + t.equal(b.indexOf('a', 1), -1) + t.equal(b.indexOf('a', -1), -1) + t.equal(b.indexOf('a', -4), -1) + t.equal(b.indexOf('a', -b.length), 0) + t.equal(b.indexOf('a', NaN), 0) + t.equal(b.indexOf('a', -Infinity), 0) + t.equal(b.indexOf('a', Infinity), -1) + t.equal(b.indexOf('bc'), 1) + t.equal(b.indexOf('bc', 2), -1) + t.equal(b.indexOf('bc', -1), -1) + t.equal(b.indexOf('bc', -3), -1) + t.equal(b.indexOf('bc', -5), 1) + t.equal(b.indexOf('bc', NaN), 1) + t.equal(b.indexOf('bc', -Infinity), 1) + t.equal(b.indexOf('bc', Infinity), -1) + t.equal(b.indexOf('f'), b.length - 1) + t.equal(b.indexOf('z'), -1) + // empty search tests + t.equal(b.indexOf(buf_a), 0) + t.equal(b.indexOf(buf_a, 1), -1) + t.equal(b.indexOf(buf_a, -1), -1) + t.equal(b.indexOf(buf_a, -4), -1) + t.equal(b.indexOf(buf_a, -b.length), 0) + t.equal(b.indexOf(buf_a, NaN), 0) + t.equal(b.indexOf(buf_a, -Infinity), 0) + t.equal(b.indexOf(buf_a, Infinity), -1) + t.equal(b.indexOf(buf_bc), 1) + t.equal(b.indexOf(buf_bc, 2), -1) + t.equal(b.indexOf(buf_bc, -1), -1) + t.equal(b.indexOf(buf_bc, -3), -1) + t.equal(b.indexOf(buf_bc, -5), 1) + t.equal(b.indexOf(buf_bc, NaN), 1) + t.equal(b.indexOf(buf_bc, -Infinity), 1) + t.equal(b.indexOf(buf_bc, Infinity), -1) + t.equal(b.indexOf(buf_f), b.length - 1) + t.equal(b.indexOf(buf_z), -1) + t.equal(b.indexOf(0x61), 0) + t.equal(b.indexOf(0x61, 1), -1) + t.equal(b.indexOf(0x61, -1), -1) + t.equal(b.indexOf(0x61, -4), -1) + t.equal(b.indexOf(0x61, -b.length), 0) + t.equal(b.indexOf(0x61, NaN), 0) + t.equal(b.indexOf(0x61, -Infinity), 0) + t.equal(b.indexOf(0x61, Infinity), -1) + t.equal(b.indexOf(0x0), -1) + + // test offsets + t.equal(b.indexOf('d', 2), 3) + t.equal(b.indexOf('f', 5), 5) + t.equal(b.indexOf('f', -1), 5) + t.equal(b.indexOf('f', 6), -1) + + t.equal(b.indexOf(Buffer.from('d'), 2), 3) + t.equal(b.indexOf(Buffer.from('f'), 5), 5) + t.equal(b.indexOf(Buffer.from('f'), -1), 5) + t.equal(b.indexOf(Buffer.from('f'), 6), -1) + + t.equal(Buffer.from('ff').indexOf(Buffer.from('f'), 1, 'ucs2'), -1) + + // test invalid and uppercase encoding + t.equal(b.indexOf('b', 'utf8'), 1) + t.equal(b.indexOf('b', 'UTF8'), 1) + t.equal(b.indexOf('62', 'HEX'), 1) + t.throws(() => b.indexOf('bad', 'enc'), TypeError) + + // test hex encoding + t.equal( + Buffer.from(b.toString('hex'), 'hex') + .indexOf('64', 0, 'hex'), + 3 + ) + t.equal( + Buffer.from(b.toString('hex'), 'hex') + .indexOf(Buffer.from('64', 'hex'), 0, 'hex'), + 3 + ) + + // test base64 encoding + t.equal( + Buffer.from(b.toString('base64'), 'base64') + .indexOf('ZA==', 0, 'base64'), + 3 + ) + t.equal( + Buffer.from(b.toString('base64'), 'base64') + .indexOf(Buffer.from('ZA==', 'base64'), 0, 'base64'), + 3 + ) + + // test ascii encoding + t.equal( + Buffer.from(b.toString('ascii'), 'ascii') + .indexOf('d', 0, 'ascii'), + 3 + ) + t.equal( + Buffer.from(b.toString('ascii'), 'ascii') + .indexOf(Buffer.from('d', 'ascii'), 0, 'ascii'), + 3 + ) + + // test optional offset with passed encoding + t.equal(Buffer.from('aaaa0').indexOf('30', 'hex'), 4) + t.equal(Buffer.from('aaaa00a').indexOf('3030', 'hex'), 4) + + { + // test usc2 encoding + const twoByteString = Buffer.from('\u039a\u0391\u03a3\u03a3\u0395', 'ucs2') + + t.equal(8, twoByteString.indexOf('\u0395', 4, 'ucs2')) + t.equal(6, twoByteString.indexOf('\u03a3', -4, 'ucs2')) + t.equal(4, twoByteString.indexOf('\u03a3', -6, 'ucs2')) + t.equal(4, twoByteString.indexOf( + Buffer.from('\u03a3', 'ucs2'), -6, 'ucs2')) + t.equal(-1, twoByteString.indexOf('\u03a3', -2, 'ucs2')) + } + + const mixedByteStringUcs2 = + Buffer.from('\u039a\u0391abc\u03a3\u03a3\u0395', 'ucs2') + t.equal(6, mixedByteStringUcs2.indexOf('bc', 0, 'ucs2')) + t.equal(10, mixedByteStringUcs2.indexOf('\u03a3', 0, 'ucs2')) + t.equal(-1, mixedByteStringUcs2.indexOf('\u0396', 0, 'ucs2')) + + t.equal( + 6, mixedByteStringUcs2.indexOf(Buffer.from('bc', 'ucs2'), 0, 'ucs2')) + t.equal( + 10, mixedByteStringUcs2.indexOf(Buffer.from('\u03a3', 'ucs2'), 0, 'ucs2')) + t.equal( + -1, mixedByteStringUcs2.indexOf(Buffer.from('\u0396', 'ucs2'), 0, 'ucs2')) + + { + const twoByteString = Buffer.from('\u039a\u0391\u03a3\u03a3\u0395', 'ucs2') + + // Test single char pattern + t.equal(0, twoByteString.indexOf('\u039a', 0, 'ucs2')) + let index = twoByteString.indexOf('\u0391', 0, 'ucs2') + t.equal(2, index, `Alpha - at index ${index}`) + index = twoByteString.indexOf('\u03a3', 0, 'ucs2') + t.equal(4, index, `First Sigma - at index ${index}`) + index = twoByteString.indexOf('\u03a3', 6, 'ucs2') + t.equal(6, index, `Second Sigma - at index ${index}`) + index = twoByteString.indexOf('\u0395', 0, 'ucs2') + t.equal(8, index, `Epsilon - at index ${index}`) + index = twoByteString.indexOf('\u0392', 0, 'ucs2') + t.equal(-1, index, `Not beta - at index ${index}`) + + // Test multi-char pattern + index = twoByteString.indexOf('\u039a\u0391', 0, 'ucs2') + t.equal(0, index, `Lambda Alpha - at index ${index}`) + index = twoByteString.indexOf('\u0391\u03a3', 0, 'ucs2') + t.equal(2, index, `Alpha Sigma - at index ${index}`) + index = twoByteString.indexOf('\u03a3\u03a3', 0, 'ucs2') + t.equal(4, index, `Sigma Sigma - at index ${index}`) + index = twoByteString.indexOf('\u03a3\u0395', 0, 'ucs2') + t.equal(6, index, `Sigma Epsilon - at index ${index}`) + } + + const mixedByteStringUtf8 = Buffer.from('\u039a\u0391abc\u03a3\u03a3\u0395') + t.equal(5, mixedByteStringUtf8.indexOf('bc')) + t.equal(5, mixedByteStringUtf8.indexOf('bc', 5)) + t.equal(5, mixedByteStringUtf8.indexOf('bc', -8)) + t.equal(7, mixedByteStringUtf8.indexOf('\u03a3')) + t.equal(-1, mixedByteStringUtf8.indexOf('\u0396')) + + + // Test complex string indexOf algorithms. Only trigger for long strings. + // Long string that isn't a simple repeat of a shorter string. + let longString = 'A' + for (let i = 66; i < 76; i++) { // from 'B' to 'K' + longString = longString + String.fromCharCode(i) + longString + } + + const longBufferString = Buffer.from(longString) + + // pattern of 15 chars, repeated every 16 chars in long + let pattern = 'ABACABADABACABA' + for (let i = 0; i < longBufferString.length - pattern.length; i += 7) { + const index = longBufferString.indexOf(pattern, i) + t.equal((i + 15) & ~0xf, index, + `Long ABACABA...-string at index ${i}`) + } + + let index = longBufferString.indexOf('AJABACA') + t.equal(510, index, `Long AJABACA, First J - at index ${index}`) + index = longBufferString.indexOf('AJABACA', 511) + t.equal(1534, index, `Long AJABACA, Second J - at index ${index}`) + + pattern = 'JABACABADABACABA' + index = longBufferString.indexOf(pattern) + t.equal(511, index, `Long JABACABA..., First J - at index ${index}`) + index = longBufferString.indexOf(pattern, 512) + t.equal( + 1535, index, `Long JABACABA..., Second J - at index ${index}`) + + // Search for a non-ASCII string in a pure ASCII string. + const asciiString = Buffer.from( + 'arglebargleglopglyfarglebargleglopglyfarglebargleglopglyf') + t.equal(-1, asciiString.indexOf('\x2061')) + t.equal(3, asciiString.indexOf('leb', 0)) + + // Search in string containing many non-ASCII chars. + const allCodePoints = [] + for (let i = 0; i < 65536; i++) allCodePoints[i] = i + const allCharsString = String.fromCharCode.apply(String, allCodePoints) + const allCharsBufferUtf8 = Buffer.from(allCharsString) + const allCharsBufferUcs2 = Buffer.from(allCharsString, 'ucs2') + + // Search for string long enough to trigger complex search with ASCII pattern + // and UC16 subject. + t.equal(-1, allCharsBufferUtf8.indexOf('notfound')) + t.equal(-1, allCharsBufferUcs2.indexOf('notfound')) + + // Needle is longer than haystack, but only because it's encoded as UTF-16 + t.equal(Buffer.from('aaaa').indexOf('a'.repeat(4), 'ucs2'), -1) + + t.equal(Buffer.from('aaaa').indexOf('a'.repeat(4), 'utf8'), 0) + t.equal(Buffer.from('aaaa').indexOf('你好', 'ucs2'), -1) + + // Haystack has odd length, but the needle is UCS2. + t.equal(Buffer.from('aaaaa').indexOf('b', 'ucs2'), -1) + + { + // Find substrings in Utf8. + const lengths = [1, 3, 15]; // Single char, simple and complex. + const indices = [0x5, 0x60, 0x400, 0x680, 0x7ee, 0xFF02, 0x16610, 0x2f77b] + for (let lengthIndex = 0; lengthIndex < lengths.length; lengthIndex++) { + for (let i = 0; i < indices.length; i++) { + const index = indices[i] + let length = lengths[lengthIndex] + + if (index + length > 0x7F) { + length = 2 * length + } + + if (index + length > 0x7FF) { + length = 3 * length + } + + if (index + length > 0xFFFF) { + length = 4 * length + } + + const patternBufferUtf8 = allCharsBufferUtf8.slice(index, index + length) + t.equal(index, allCharsBufferUtf8.indexOf(patternBufferUtf8)) + + const patternStringUtf8 = patternBufferUtf8.toString() + t.equal(index, allCharsBufferUtf8.indexOf(patternStringUtf8)) + } + } + } + + { + // Find substrings in Usc2. + const lengths = [2, 4, 16]; // Single char, simple and complex. + const indices = [0x5, 0x65, 0x105, 0x205, 0x285, 0x2005, 0x2085, 0xfff0] + for (let lengthIndex = 0; lengthIndex < lengths.length; lengthIndex++) { + for (let i = 0; i < indices.length; i++) { + const index = indices[i] * 2 + const length = lengths[lengthIndex] + + const patternBufferUcs2 = + allCharsBufferUcs2.slice(index, index + length) + t.equal( + index, allCharsBufferUcs2.indexOf(patternBufferUcs2, 0, 'ucs2')) + + const patternStringUcs2 = patternBufferUcs2.toString('ucs2') + t.equal( + index, allCharsBufferUcs2.indexOf(patternStringUcs2, 0, 'ucs2')) + } + } + } + + [ + () => {}, + {}, + [] + ].forEach(val => { + debugger + t.throws(() => b.indexOf(val), TypeError, `"${JSON.stringify(val)}" should throw`) + }) + + // Test weird offset arguments. + // The following offsets coerce to NaN or 0, searching the whole Buffer + t.equal(b.indexOf('b', undefined), 1) + t.equal(b.indexOf('b', {}), 1) + t.equal(b.indexOf('b', 0), 1) + t.equal(b.indexOf('b', null), 1) + t.equal(b.indexOf('b', []), 1) + + // The following offset coerces to 2, in other words +[2] === 2 + t.equal(b.indexOf('b', [2]), -1) + + // Behavior should match String.indexOf() + t.equal( + b.indexOf('b', undefined), + stringComparison.indexOf('b', undefined)) + t.equal( + b.indexOf('b', {}), + stringComparison.indexOf('b', {})) + t.equal( + b.indexOf('b', 0), + stringComparison.indexOf('b', 0)) + t.equal( + b.indexOf('b', null), + stringComparison.indexOf('b', null)) + t.equal( + b.indexOf('b', []), + stringComparison.indexOf('b', [])) + t.equal( + b.indexOf('b', [2]), + stringComparison.indexOf('b', [2])) + + // test truncation of Number arguments to uint8 + { + const buf = Buffer.from('this is a test') + t.equal(buf.indexOf(0x6973), 3) + t.equal(buf.indexOf(0x697320), 4) + t.equal(buf.indexOf(0x69732069), 2) + t.equal(buf.indexOf(0x697374657374), 0) + t.equal(buf.indexOf(0x69737374), 0) + t.equal(buf.indexOf(0x69737465), 11) + t.equal(buf.indexOf(0x69737465), 11) + t.equal(buf.indexOf(-140), 0) + t.equal(buf.indexOf(-152), 1) + t.equal(buf.indexOf(0xff), -1) + t.equal(buf.indexOf(0xffff), -1) + } + + // Test that Uint8Array arguments are okay. + { + const needle = new Uint8Array([ 0x66, 0x6f, 0x6f ]) + const haystack = new BufferList(Buffer.from('a foo b foo')) + t.equal(haystack.indexOf(needle), 2) + } + t.end() +}) + !process.browser && tape('test stream', function (t) { var random = crypto.randomBytes(65534) , rndhash = hash(random, 'md5')