Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speedup String#{ indexOf, lastIndexOf } #2649

Draft
wants to merge 22 commits into
base: main
Choose a base branch
from
4 changes: 2 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

33 changes: 29 additions & 4 deletions std/assembly/string.ts
@@ -1,7 +1,18 @@
/// <reference path="./rt/index.d.ts" />

import { OBJECT, BLOCK_MAXSIZE, TOTAL_OVERHEAD } from "./rt/common";
import { compareImpl, strtol, strtod, isSpace, isAscii, isFinalSigma, toLower8, toUpper8 } from "./util/string";
import {
compareImpl,
findCodePointForward,
findCodePointBackward,
strtol,
strtod,
isSpace,
isAscii,
isFinalSigma,
toLower8,
toUpper8
} from "./util/string";
import { SPECIALS_UPPER, casemap, bsearch } from "./util/casemap";
import { E_INDEXOUTOFRANGE, E_INVALIDLENGTH, E_UNPAIRED_SURROGATE } from "./util/error";
import { idof } from "./builtins";
Expand Down Expand Up @@ -165,6 +176,13 @@ import { Array } from "./array";
let len = <isize>this.length;
if (!len) return -1;
let searchStart = min(max(<isize>start, 0), len);
if (len - searchStart < searchLen) return -1;
if (ASC_SHRINK_LEVEL <= 2) {
let firstChar = load<u16>(changetype<usize>(search));
searchStart = findCodePointForward(changetype<usize>(this), searchStart, len, firstChar);
if (searchStart == -1) return -1; // Nothing found
if (searchLen == 1) return <i32>searchStart; // Needle is single character
}
for (len -= searchLen; searchStart <= len; ++searchStart) {
// @ts-ignore: string <-> String
if (!compareImpl(this, searchStart, search, 0, searchLen)) return <i32>searchStart;
Expand All @@ -175,9 +193,16 @@ import { Array } from "./array";
lastIndexOf(search: String, start: i32 = i32.MAX_VALUE): i32 {
let searchLen = <isize>search.length;
if (!searchLen) return this.length;
let len = this.length;
let len = <isize>this.length;
if (!len) return -1;
let searchStart = min(max(<isize>start, 0), <isize>len - searchLen);
if (len < searchLen) return -1;
let searchStart = min(max(<isize>start, 0), len - searchLen);
if (ASC_SHRINK_LEVEL <= 2) {
let firstChar = load<u16>(changetype<usize>(search));
searchStart = findCodePointBackward(changetype<usize>(this), searchStart, firstChar);
if (searchStart == -1) return -1; // Nothing found
if (searchLen == 1) return <i32>searchStart; // Needle is single character
}
for (; searchStart >= 0; --searchStart) {
// @ts-ignore: string <-> String
if (!compareImpl(this, searchStart, search, 0, searchLen)) return <i32>searchStart;
Expand Down Expand Up @@ -843,5 +868,5 @@ export namespace String {
}

export class TemplateStringsArray extends Array<string> {
readonly raw: string[];
readonly raw!: string[];
}
66 changes: 65 additions & 1 deletion std/assembly/util/string.ts
Expand Up @@ -1187,7 +1187,7 @@ function parseExp(ptr: usize, len: i32): i32 {
function fixmul(a: u64, b: u32): u64 {
let low = (a & 0xFFFFFFFF) * b;
let high = (a >> 32) * b + (low >> 32);
let overflow = <u32>(high >> 32);
let overflow = u32(high >> 32);
let space = clz(overflow);
let revspace: u64 = 32 - space;
__fixmulShift += revspace;
Expand All @@ -1200,3 +1200,67 @@ function pow10(n: i32): f64 {
// argument `n` should bounds in [0, 22] range
return load<f64>(POWERS10 + (n << alignof<f64>()));
}

// @ts-ignore: decorator
@inline
function makeMoveMask(value: u64): u64 {
return (value - 0x0001_0001_0001_0001) & ~value & 0x8000_8000_8000_8000;
}

// @ts-ignore: decorator
@inline
function maskToIndex(x: u64): isize {
return <isize>ctz(x) >>> 4;
}

export function findCodePointForward(input: usize, start: isize, len: isize, code: u32): isize {
len -= start;
let ptr = input + (start << 1);
let c64 = <u64>code * 0x0001_0001_0001_0001; // repeat code point 4 times in 64-bit word
// Process 4 code points at once
while (len >= 4) {
// Roughly emulate 16-bit per lane move mask
let mask = makeMoveMask(load<u64>(ptr) ^ c64);
if (mask) return (ptr - input >>> 1) + maskToIndex(mask);
ptr += 8;
len -= 4;
}
// Process rest of code points one by one. It takes form 0 to 3 iterations
while (len > 0) {
if (load<u16>(ptr) == code) return ptr - input >>> 1;
ptr += 2;
len -= 1;
}
return -1;
}

export function findCodePointBackward(input: usize, start: isize, code: u32): isize {
let ptr = input + (start << 1);
// Align to 8 bytes
while (ptr & 7) {
if (load<u16>(ptr) == code) return ptr - input >>> 1;
ptr -= 2;
if (ptr < input) return -1;
}

if (isize(ptr - input) >= 8) {
let c64 = <u64>code * 0x0001_0001_0001_0001; // repeat code point 4 times in 64-bit word
let src = ptr - 8;
let off = isize(src - input);
// Process 4 code points at once
do {
// Roughly emulate 16-bit per lane move mask
let mask = makeMoveMask(load<u64>(src) ^ c64);
if (mask) return (src - input >>> 1) + maskToIndex(mask);
src -= 8;
off -= 8;
} while (off >= 8);
ptr = src + 8;
}
// Process rest of code points one by one. It takes form 0 to 3 iterations
while (ptr >= input) {
if (load<u16>(ptr) == code) return ptr - input >>> 1;
ptr -= 2;
}
return -1;
}