[PERF] tokenizer: avoid regexp match

When a matching a string against a regexp, some memory is allocated. tokenize tokenizeSymbol Model Before 422ms 203ms 3382ms 215Mb 127Mb After 407ms 188ms 3335ms (-3.4%) (-7.2%) (-1.38%) 197Mb 106Mb (-8.3%) (-16.5%) (averages over 5 runs) closes #4191 Task: 3922347 Signed-off-by: Pierre Rousseau (pro) <pro@odoo.com>
odoo · May 13, 2024 · dceb0c4 · dceb0c4
1 parent 998bea0
commit dceb0c4
Showing 1 changed file with 3 additions and 6 deletions.
diff --git a/src/formulas/tokenizer.ts b/src/formulas/tokenizer.ts
@@ -143,7 +143,7 @@ function tokenizeString(chars: TokenizingChars): Token | null {
   return null;
 }
 
-const separatorRegexp = /^[\w\.!\$]+/;
+const SYMBOL_CHARS = new Set("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_.!$");
 
 /**
  * A "Symbol" is just basically any word-like element that can appear in a
@@ -184,11 +184,8 @@ function tokenizeSymbol(chars: TokenizingChars): Token | null {
       };
     }
   }
-  const match = chars.remaining().match(separatorRegexp);
-  if (match) {
-    const value = match[0];
-    result += value;
-    chars.advanceBy(value.length);
+  while (chars.current && SYMBOL_CHARS.has(chars.current)) {
+    result += chars.shift();
   }
   if (result.length) {
     const value = result;