wip removing unnecessary decode operations

vercel · Mar 18, 2024 · a47aa8a · a47aa8a
1 parent 51b878f
commit a47aa8a
Show file tree

Hide file tree

Showing 3 changed files with 104 additions and 31 deletions.
diff --git a/packages/next/src/server/stream-utils/encodedTags.ts b/packages/next/src/server/stream-utils/encodedTags.ts
@@ -0,0 +1,15 @@
+export const ENCODED_TAGS = {
+  OPENING: {},
+  CLOSED: {
+    // </head>
+    HEAD: new Uint8Array([60, 47, 104, 101, 97, 100, 62]),
+    // </body>
+    BODY: new Uint8Array([60, 47, 98, 111, 100, 121, 62]),
+    // </html>
+    HTML: new Uint8Array([60, 47, 104, 116, 109, 108, 62]),
+    // </body></html>
+    BODY_AND_HTML: new Uint8Array([
+      60, 47, 98, 111, 100, 121, 62, 60, 47, 104, 116, 109, 108, 62,
+    ]),
+  },
+} as const
diff --git a/packages/next/src/server/stream-utils/node-web-streams-helper.ts b/packages/next/src/server/stream-utils/node-web-streams-helper.ts
@@ -2,6 +2,12 @@ import { getTracer } from '../lib/trace/tracer'
 import { AppRenderSpan } from '../lib/trace/constants'
 import { DetachedPromise } from '../../lib/detached-promise'
 import { scheduleImmediate, atLeastOneTask } from '../../lib/scheduler'
+import { ENCODED_TAGS } from './encodedTags'
+import {
+  indexOfUint8Array,
+  isEquivalentUint8Arrays,
+  removeFromUint8Array,
+} from './uint8array-helpers'
 
 function voidCatch() {
   // this catcher is designed to be used with pipeTo where we expect the underlying
@@ -175,8 +181,6 @@ function createHeadInsertionTransformStream(
   let inserted = false
   let freezing = false
 
-  const decoder = new TextDecoder()
-
   // We need to track if this transform saw any bytes because if it didn't
   // we won't want to insert any server HTML at all
   let hasBytes = false
@@ -191,17 +195,24 @@ function createHeadInsertionTransformStream(
       }
 
       const insertion = await insert()
+      const encodedInsertion = encoder.encode(insertion)
       if (inserted) {
-        controller.enqueue(encoder.encode(insertion))
+        controller.enqueue(encodedInsertion)
         controller.enqueue(chunk)
         freezing = true
       } else {
-        const content = decoder.decode(chunk)
-        const index = content.indexOf('</head>')
+        const index = indexOfUint8Array(chunk, ENCODED_TAGS.CLOSED.HEAD)
         if (index !== -1) {
-          const insertedHeadContent =
-            content.slice(0, index) + insertion + content.slice(index)
-          controller.enqueue(encoder.encode(insertedHeadContent))
+          const insertedHeadContent = new Uint8Array(
+            chunk.length + encodedInsertion.length
+          )
+          insertedHeadContent.set(chunk.slice(0, index))
+          insertedHeadContent.set(encodedInsertion, index)
+          insertedHeadContent.set(
+            chunk.slice(index),
+            index + encodedInsertion.length
+          )
+          controller.enqueue(insertedHeadContent)
           freezing = true
           inserted = true
         }
@@ -344,37 +355,34 @@ function createMoveSuffixStream(
 ): TransformStream<Uint8Array, Uint8Array> {
   let foundSuffix = false
 
-  const decoder = new TextDecoder()
+  const encodedSuffix = encoder.encode(suffix)
 
   return new TransformStream({
     transform(chunk, controller) {
       if (foundSuffix) {
         return controller.enqueue(chunk)
       }
 
-      const buf = decoder.decode(chunk)
-      const index = buf.indexOf(suffix)
+      const index = indexOfUint8Array(chunk, encodedSuffix)
       if (index > -1) {
         foundSuffix = true
 
         // If the whole chunk is the suffix, then don't write anything, it will
         // be written in the flush.
-        if (buf.length === suffix.length) {
+        if (chunk.length === suffix.length) {
           return
         }
 
         // Write out the part before the suffix.
-        const before = buf.slice(0, index)
-        chunk = encoder.encode(before)
-        controller.enqueue(chunk)
+        const before = chunk.slice(0, index)
+        controller.enqueue(before)
 
         // In the case where the suffix is in the middle of the chunk, we need
         // to split the chunk into two parts.
-        if (buf.length > suffix.length + index) {
+        if (chunk.length > suffix.length + index) {
           // Write out the part after the suffix.
-          const after = buf.slice(index + suffix.length)
-          chunk = encoder.encode(after)
-          controller.enqueue(chunk)
+          const after = chunk.slice(index + suffix.length)
+          controller.enqueue(after)
         }
       } else {
         controller.enqueue(chunk)
@@ -383,7 +391,7 @@ function createMoveSuffixStream(
     flush(controller) {
       // Even if we didn't find the suffix, the HTML is not valid if we don't
       // add it, so insert it at the end.
-      controller.enqueue(encoder.encode(suffix))
+      controller.enqueue(encodedSuffix)
     },
   })
 }
@@ -392,32 +400,31 @@ function createStripDocumentClosingTagsTransform(): TransformStream<
   Uint8Array,
   Uint8Array
 > {
-  const decoder = new TextDecoder()
   return new TransformStream({
     transform(chunk, controller) {
       // We rely on the assumption that chunks will never break across a code unit.
       // This is reasonable because we currently concat all of React's output from a single
       // flush into one chunk before streaming it forward which means the chunk will represent
       // a single coherent utf-8 string. This is not safe to use if we change our streaming to no
       // longer do this large buffered chunk
-      let originalContent = decoder.decode(chunk)
-      let content = originalContent
-
       if (
-        content === '</body></html>' ||
-        content === '</body>' ||
-        content === '</html>'
+        isEquivalentUint8Arrays(chunk, ENCODED_TAGS.CLOSED.BODY_AND_HTML) ||
+        isEquivalentUint8Arrays(chunk, ENCODED_TAGS.CLOSED.BODY) ||
+        isEquivalentUint8Arrays(chunk, ENCODED_TAGS.CLOSED.HTML)
       ) {
         // the entire chunk is the closing tags.
         return
       } else {
         // We assume these tags will go at together at the end of the document and that
         // they won't appear anywhere else in the document. This is not really a safe assumption
         // but until we revamp our streaming infra this is a performant way to string the tags
-        content = content.replace('</body>', '').replace('</html>', '')
-        if (content.length !== originalContent.length) {
-          return controller.enqueue(encoder.encode(content))
-        }
+        let transformed = removeFromUint8Array(chunk, ENCODED_TAGS.CLOSED.BODY)
+        transformed = removeFromUint8Array(
+          transformed,
+          ENCODED_TAGS.CLOSED.HTML
+        )
+
+        controller.enqueue(transformed)
       }
 
       controller.enqueue(chunk)

diff --git a/packages/next/src/server/stream-utils/uint8array-helpers.ts b/packages/next/src/server/stream-utils/uint8array-helpers.ts
@@ -0,0 +1,51 @@
+/**
+ * Find the starting index of Uint8Array `b` within Uint8Array `a`.
+ */
+export function indexOfUint8Array(a: Uint8Array, b: Uint8Array) {
+  if (a.length === 0 || b.length === 0 || b.length > a.length) return -1
+
+  // start iterating through `a`
+  for (let i = 0; i <= a.length - b.length; i++) {
+    let completeMatch = true
+    // from index `i`, iterate through `b` and check for mismatch
+    for (let j = 0; j < b.length; j++) {
+      // if the values do not match, then this isn't a complete match, exit `b` iteration early and iterate to next index of `a`.
+      if (a[i + j] !== b[j]) {
+        completeMatch = false
+        break
+      }
+    }
+
+    if (completeMatch) {
+      return i
+    }
+  }
+
+  return -1
+}
+
+/**
+ * Check if two Uint8Arrays are strictly equivalent.
+ */
+export function isEquivalentUint8Arrays(a: Uint8Array, b: Uint8Array) {
+  return a.length === b.length && a.every((v, i) => v === b[i])
+}
+
+/**
+ * Remove Uint8Array `b` from Uint8Array `a`.
+ *
+ * If `b` is not in `a`, `a` is returned unchanged.
+ *
+ * Otherwise, the function returns a new Uint8Array instance with size `a.length - b.length`
+ */
+export function removeFromUint8Array(a: Uint8Array, b: Uint8Array) {
+  const tagIndex = indexOfUint8Array(a, b)
+  if (tagIndex) {
+    const removed = new Uint8Array(a.length - b.length)
+    removed.set(a.slice(0, tagIndex))
+    removed.set(a.slice(tagIndex + b.length), tagIndex)
+    return removed
+  } else {
+    return a
+  }
+}