From 521a3fdee9b0cf892e1c227cee554927c48c4740 Mon Sep 17 00:00:00 2001
From: Xu Xing <xing.xu@intel.com>
Date: Wed, 9 Nov 2022 14:15:48 +0800
Subject: [PATCH 1/9] [webgpu] create tensor from GPUBuffer

BUG: https://github.com/tensorflow/tfjs/issues/6232
---
 tfjs-backend-webgl/src/backend_webgl.ts       |   5 +-
 tfjs-backend-webgpu/src/backend_webgpu.ts     |  38 +++++-
 .../src/backend_webgpu_test.ts                | 118 ++++++++++++++++++
 tfjs-core/src/backends/backend.ts             |  10 +-
 tfjs-core/src/base.ts                         |   2 +-
 tfjs-core/src/ops/tensor.ts                   |  65 +++++++++-
 tfjs-core/src/ops/tensor_ops_util.ts          |  22 ++--
 tfjs-core/src/tensor_util_env.ts              |  18 ++-
 tfjs-core/src/types.ts                        |  10 ++
 tfjs-core/src/util_base.ts                    |   4 +-
 10 files changed, 264 insertions(+), 28 deletions(-)

diff --git a/tfjs-backend-webgl/src/backend_webgl.ts b/tfjs-backend-webgl/src/backend_webgl.ts
index b01b295e0f..3048694276 100644
--- a/tfjs-backend-webgl/src/backend_webgl.ts
+++ b/tfjs-backend-webgl/src/backend_webgl.ts
@@ -1304,8 +1304,9 @@ export class MathBackendWebGL extends KernelBackend {
    * Create a TF.js tensor out of an existing WebGL texture. A new texture will
    * be created.
    */
-  override createTensorFromTexture(values: WebGLData, shape: number[],
-      dtype: DataType): Tensor {
+  override createTensorFromGPUData(
+      values: WebGLData, shape: number[], dtype: DataType): Tensor {
+    values.channels = values.channels || 'RGBA';
     const {texture, height, width, channels} = values;
     const backend = engine().backend as MathBackendWebGL;
 
diff --git a/tfjs-backend-webgpu/src/backend_webgpu.ts b/tfjs-backend-webgpu/src/backend_webgpu.ts
index 1b296a6b8d..44967371d7 100644
--- a/tfjs-backend-webgpu/src/backend_webgpu.ts
+++ b/tfjs-backend-webgpu/src/backend_webgpu.ts
@@ -17,7 +17,7 @@
 
 import './flags_webgpu';
 
-import {backend_util, buffer, DataStorage, DataType, engine, env, GPUData, KernelBackend, Rank, RecursiveArray, ShapeMap, TensorBuffer, TensorInfo, TimingInfo, TypedArray, util} from '@tensorflow/tfjs-core';
+import {backend_util, buffer, DataStorage, DataType, engine, env, GPUData, KernelBackend, Rank, RecursiveArray, ShapeMap, Tensor, TensorBuffer, TensorInfo, TimingInfo, TypedArray, util, WebGPUData} from '@tensorflow/tfjs-core';
 
 import {AdapterInfo} from './adapter_info';
 import {BufferManager} from './buffer_manager';
@@ -51,6 +51,8 @@ type TensorData = {
   shape: number[],
   refCount: number,
   resourceInfo?: BufferInfo|TextureInfo,
+  // Indicate the tensor is created from an external GPU resource.
+  external?: boolean,
   // For complex numbers, the real and imaginary parts are stored as their own
   // individual tensors, with a parent joining the two with the
   // complexTensorInfos field.
@@ -242,6 +244,11 @@ export class WebGPUBackend extends KernelBackend {
     if (!tensorData || !tensorData.resourceInfo) {
       return;
     }
+    // If tensor data is from external resource, do not release.
+    if (tensorData.external) {
+      tensorData.resourceInfo = null;
+      return;
+    }
     if ('texture' in tensorData.resourceInfo) {
       const textureInfo = tensorData.resourceInfo;
       if (textureInfo.texture instanceof GPUTexture) {
@@ -282,7 +289,8 @@ export class WebGPUBackend extends KernelBackend {
     }
   }
 
-  override write(values: backend_util.BackendValues, shape: number[],
+  override write(
+      values: backend_util.BackendValues, shape: number[],
       dtype: DataType): DataId {
     if (dtype === 'complex64' && values != null) {
       throw new Error(
@@ -437,6 +445,32 @@ export class WebGPUBackend extends KernelBackend {
     return vals;
   }
 
+  /**
+   * Create a TF.js tensor out of an existing WebGPU buffer.
+   */
+  override createTensorFromGPUData(
+      values: WebGPUData, shape: number[], dtype: DataType): Tensor {
+    const buffer = values.buffer;
+    if (dtype === 'complex64') {
+      throw new Error(`Cannot write to a complex64 dtype. `);
+    }
+    const dataId = {id: this.nextDataId()};
+    this.tensorMap.set(
+        dataId, {dtype, shape, values: null, refCount: 1, external: true});
+    const tensorData = this.tensorMap.get(dataId);
+    const sizeFromShape = util.sizeFromShape(tensorData.shape);
+    const size =
+        webgpu_util.GPUBytesPerElement(tensorData.dtype) * sizeFromShape;
+    if (values.size < sizeFromShape) {
+      throw new Error(`GPUBuffer size(${
+          values.size}) is smaller than tensor size(${sizeFromShape})!`);
+    }
+
+    tensorData
+        .resourceInfo = {size, usage: this.defaultGpuBufferUsage(), buffer};
+    return engine().makeTensorFromDataId(dataId, shape, dtype, this);
+  }
+
   /**
    * Read tensor to a new GPUBuffer.
    * @param dataId The source tensor.
diff --git a/tfjs-backend-webgpu/src/backend_webgpu_test.ts b/tfjs-backend-webgpu/src/backend_webgpu_test.ts
index c34c9e69d7..04cee1ec29 100644
--- a/tfjs-backend-webgpu/src/backend_webgpu_test.ts
+++ b/tfjs-backend-webgpu/src/backend_webgpu_test.ts
@@ -366,3 +366,121 @@ describeWebGPU('keeping data on gpu ', () => {
     expect(endDataBuckets).toEqual(startDataBuckets + 1);
   });
 });
+
+async function createReadonlyGPUBufferFromData(
+    device: GPUDevice, data: number[], dtype: tf.DataType) {
+  const bytesPerElement = 4;
+  const sizeInBytes = data.length * bytesPerElement;
+
+  const gpuWriteBuffer = device.createBuffer({
+    mappedAtCreation: true,
+    size: sizeInBytes,
+    usage: GPUBufferUsage.MAP_WRITE | GPUBufferUsage.COPY_SRC
+  });
+  const arrayBuffer = gpuWriteBuffer.getMappedRange();
+  if (dtype === 'float32') {
+    new Float32Array(arrayBuffer).set(data);
+  } else if (dtype === 'int32') {
+    new Int32Array(arrayBuffer).set(data);
+  } else {
+    throw new Error(
+        `Creating tensor from GPUBuffer only supports` +
+        `'float32'|'int32' dtype, while the dtype is ${dtype}.`);
+  }
+  gpuWriteBuffer.unmap();
+
+  const gpuReadBuffer = device.createBuffer({
+    mappedAtCreation: false,
+    size: sizeInBytes,
+    usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE
+  });
+
+  const copyEncoder = device.createCommandEncoder();
+  copyEncoder.copyBufferToBuffer(
+      gpuWriteBuffer, 0, gpuReadBuffer, 0, sizeInBytes);
+  const copyCommands = copyEncoder.finish();
+  device.queue.submit([copyCommands]);
+  gpuWriteBuffer.destroy();
+  return gpuReadBuffer;
+}
+
+async function testCreateTensorFromGPUBuffer(
+    dtype: tf.DataType, useDefaultShapeAndType = false) {
+  const webGPUBackend = tf.backend() as WebGPUBackend;
+  const device = webGPUBackend.device;
+  const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
+  const bData = [1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4];
+  const expected = [2, 4, 6, 8, 6, 8, 10, 12, 10, 12, 14, 16, 14, 16, 18, 20];
+  const aBuffer = await createReadonlyGPUBufferFromData(device, aData, dtype);
+  const shape: number[] = [aData.length];
+  const startNumBytes = tf.memory().numBytes;
+  const startNumTensors = tf.memory().numTensors;
+  const a = useDefaultShapeAndType ?
+      tf.tensor({buffer: aBuffer, size: aData.length}) :
+      tf.tensor({buffer: aBuffer, size: aData.length}, shape, dtype);
+  const b = tf.tensor(bData, shape, dtype);
+  const result = tf.add(a, b);
+  tf.test_util.expectArraysClose(await result.data(), expected);
+  a.dispose();
+  b.dispose();
+  result.dispose();
+  const endNumBytes = tf.memory().numBytes;
+  const endNumTensors = tf.memory().numTensors;
+  expect(endNumBytes - startNumBytes).toEqual(0);
+  expect(endNumTensors - startNumTensors).toEqual(0);
+  aBuffer.destroy();
+}
+
+describeWebGPU('create tensor from GPUBuffer', () => {
+  it('use default shape and data type(float32)', async () => {
+    await testCreateTensorFromGPUBuffer('float32', true);
+  });
+
+  it('work for float32', async () => {
+    await testCreateTensorFromGPUBuffer('float32');
+  });
+
+  it('work for int32', async () => {
+    await testCreateTensorFromGPUBuffer('int32');
+  });
+
+  it('throw when size is not set or incorrect', async () => {
+    const webGPUBackend = tf.backend() as WebGPUBackend;
+    const device = webGPUBackend.device;
+    const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
+    const dtype = 'float32';
+    const aBuffer = await createReadonlyGPUBufferFromData(device, aData, dtype);
+    const shape: number[] = [aData.length];
+    const a = () => tf.tensor({buffer: aBuffer}, shape, dtype);
+    expect(a).toThrowError();
+    const b = () => tf.tensor({buffer: aBuffer, size: 0}, shape, dtype);
+    expect(b).toThrowError();
+    aBuffer.destroy();
+  });
+
+  it('two tensors share the same GPUBuffer', async () => {
+    const webGPUBackend = tf.backend() as WebGPUBackend;
+    const device = webGPUBackend.device;
+    const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
+    const dtype = 'float32';
+    const aBuffer = await createReadonlyGPUBufferFromData(device, aData, dtype);
+    const startNumBytes = tf.memory().numBytes;
+    const startNumTensors = tf.memory().numTensors;
+    const shape: number[] = [aData.length];
+    const size = aData.length * 4;
+    const a = tf.tensor({buffer: aBuffer, size}, shape, dtype);
+    const b = tf.tensor({buffer: aBuffer, size}, shape, dtype);
+    const result = tf.add(a, b);
+    const expected =
+        [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32];
+    tf.test_util.expectArraysClose(await result.data(), expected);
+    a.dispose();
+    b.dispose();
+    result.dispose();
+    const endNumBytes = tf.memory().numBytes;
+    const endNumTensors = tf.memory().numTensors;
+    expect(endNumBytes - startNumBytes).toEqual(0);
+    expect(endNumTensors - startNumTensors).toEqual(0);
+    aBuffer.destroy();
+  });
+});
diff --git a/tfjs-core/src/backends/backend.ts b/tfjs-core/src/backends/backend.ts
index 33626bd31d..6f23ec0a3e 100644
--- a/tfjs-core/src/backends/backend.ts
+++ b/tfjs-core/src/backends/backend.ts
@@ -17,7 +17,7 @@
 
 import {Backend, DataToGPUOptions, GPUData, Tensor} from '../tensor';
 import {DataId} from '../tensor_info';
-import {BackendValues, DataType, WebGLData} from '../types';
+import {BackendValues, DataType, WebGLData, WebGPUData} from '../types';
 
 export const EPSILON_FLOAT32 = 1e-7;
 export const EPSILON_FLOAT16 = 1e-4;
@@ -133,10 +133,12 @@ export class KernelBackend implements TensorStorage, Backend, BackendTimer {
       refCount: number): void {
     return notYetImplemented('move');
   }
-  createTensorFromTexture(values: WebGLData, shape: number[], dtype: DataType):
-      Tensor {
-    return notYetImplemented('createTensorFromTexture');
+
+  createTensorFromGPUData(
+      values: WebGLData|WebGPUData, shape: number[], dtype: DataType): Tensor {
+    return notYetImplemented('createTensorFromGPUData');
   }
+
   memory(): {unreliable: boolean; reasons?: string[]} {
     return notYetImplemented('memory');
   }
diff --git a/tfjs-core/src/base.ts b/tfjs-core/src/base.ts
index c8a5ef7419..e869ede5d2 100644
--- a/tfjs-core/src/base.ts
+++ b/tfjs-core/src/base.ts
@@ -55,7 +55,7 @@ export {RMSPropOptimizer} from './optimizers/rmsprop_optimizer';
 export {SGDOptimizer} from './optimizers/sgd_optimizer';
 export {DataToGPUOptions, DataToGPUWebGLOption, GPUData, Scalar, Tensor, Tensor1D, Tensor2D, Tensor3D, Tensor4D, Tensor5D, TensorBuffer, Variable} from './tensor';
 export {GradSaveFunc, NamedTensorMap, TensorContainer, TensorContainerArray, TensorContainerObject} from './tensor_types';
-export {BackendValues, DataType, DataTypeMap, DataValues, NumericDataType, PixelData, Rank, RecursiveArray, ScalarLike, ShapeMap, sumOutType, TensorLike, TypedArray, upcastType, WebGLData} from './types';
+export {BackendValues, DataType, DataTypeMap, DataValues, NumericDataType, PixelData, Rank, RecursiveArray, ScalarLike, ShapeMap, sumOutType, TensorLike, TypedArray, upcastType, WebGLData, WebGPUData} from './types';
 
 export * from './ops/ops';
 export {Reduction} from './ops/loss_ops_utils';
diff --git a/tfjs-core/src/ops/tensor.ts b/tfjs-core/src/ops/tensor.ts
index cf9933251c..507454ee41 100644
--- a/tfjs-core/src/ops/tensor.ts
+++ b/tfjs-core/src/ops/tensor.ts
@@ -18,7 +18,7 @@
 import {Tensor} from '../tensor';
 import {inferShape} from '../tensor_util_env';
 import {TensorLike} from '../types';
-import {DataType, Rank, ShapeMap, WebGLData} from '../types';
+import {DataType, Rank, ShapeMap, WebGLData, WebGPUData} from '../types';
 
 import {makeTensor} from './tensor_ops_util';
 
@@ -92,6 +92,67 @@ import {makeTensor} from './tensor_ops_util';
  *
  * const tex = a.dataToGPU();
  * ```
+ *
+ * ```js
+ * // Pass a `WebGPUData` object and specify a shape yourself.
+ *
+ * // This makes it possible for TF.js applications to avoid GPU / CPU sync.
+ * // For example, if your application includes a preprocessing step on the GPU,
+ * // you could upload the GPU output directly to TF.js, rather than first
+ * // downloading the values.
+ *
+ * // Example for WebGPU:
+ * async function createReadonlyGPUBufferFromData(device, data, dtype) {
+ *   const bytesPerElement = 4;
+ *   const sizeInBytes = data.length * bytesPerElement;
+ *
+ *   const gpuWriteBuffer = device.createBuffer({
+ *     mappedAtCreation: true,
+ *     size: sizeInBytes,
+ *     usage: GPUBufferUsage.MAP_WRITE | GPUBufferUsage.COPY_SRC
+ *   });
+ *   const arrayBuffer = gpuWriteBuffer.getMappedRange();
+ *   if (dtype === 'float32') {
+ *     new Float32Array(arrayBuffer).set(data);
+ *   } else if (dtype === 'int32') {
+ *     new Int32Array(arrayBuffer).set(data);
+ *   } else {
+ *     throw new Error(
+ *         `Creating tensor from GPUBuffer only supports` +
+ *         `'float32'|'int32' dtype, while the dtype is ${dtype}.`);
+ *   }
+ *   gpuWriteBuffer.unmap();
+ *
+ *   const gpuReadBuffer = device.createBuffer({
+ *     mappedAtCreation: false,
+ *     size: sizeInBytes,
+ *     usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE
+ *   });
+ *
+ *   const copyEncoder = device.createCommandEncoder();
+ *   copyEncoder.copyBufferToBuffer(
+ *       gpuWriteBuffer, 0, gpuReadBuffer, 0, sizeInBytes);
+ *   const copyCommands = copyEncoder.finish();
+ *   device.queue.submit([copyCommands]);
+ *   gpuWriteBuffer.destroy();
+ *   return gpuReadBuffer;
+ * }
+ *
+ * const dtype = 'float32';
+ * const device = tf.backend().device;
+ * const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
+ * const bData = [1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4];
+ * const expected = [2, 4, 6, 8, 6, 8, 10, 12, 10, 12, 14, 16, 14, 16, 18, 20];
+ * const aBuffer = await createReadonlyGPUBufferFromData(device, aData, dtype);
+ * const shape = [aData.length];
+ * const a = tf.tensor({buffer: aBuffer, size: aData.length});
+ * const b = tf.tensor(bData, shape, dtype);
+ * const result = tf.add(a, b);
+ * a.dispose();
+ * b.dispose();
+ * result.dispose();
+ * aBuffer.destroy();
+ * ```
  * @param values The values of the tensor. Can be nested array of numbers,
  *     or a flat array, or a `TypedArray`, or a `WebGLData` object. If the
  * values are strings, they will be encoded as utf-8 and kept as `Uint8Array[]`.
@@ -113,7 +174,7 @@ import {makeTensor} from './tensor_ops_util';
  * @doc {heading: 'Tensors', subheading: 'Creation'}
  */
 export function tensor<R extends Rank>(
-    values: TensorLike|WebGLData, shape?: ShapeMap[R],
+    values: TensorLike|WebGLData|WebGPUData, shape?: ShapeMap[R],
     dtype?: DataType): Tensor<R> {
   const inferredShape = inferShape(values, dtype);
   return makeTensor(values, shape, inferredShape, dtype) as Tensor<R>;
diff --git a/tfjs-core/src/ops/tensor_ops_util.ts b/tfjs-core/src/ops/tensor_ops_util.ts
index 1b497d4ceb..d72a11317e 100644
--- a/tfjs-core/src/ops/tensor_ops_util.ts
+++ b/tfjs-core/src/ops/tensor_ops_util.ts
@@ -17,32 +17,34 @@
 
 import {ENGINE} from '../engine';
 import {Tensor} from '../tensor';
-import {TensorLike, TypedArray, WebGLData} from '../types';
+import {TensorLike, TypedArray, WebGLData, WebGPUData} from '../types';
 import {DataType} from '../types';
 import {assert, assertNonNegativeIntegerDimensions, flatten, inferDtype, isTypedArray, sizeFromShape, toTypedArray} from '../util';
 
 /** This is shared code across all tensor creation methods. */
 export function makeTensor(
-    values: TensorLike|WebGLData, shape: number[], inferredShape: number[],
-    dtype?: DataType): Tensor {
+    values: TensorLike|WebGLData|WebGPUData, shape: number[],
+    inferredShape: number[], dtype?: DataType): Tensor {
   if (dtype == null) {
     dtype = inferDtype(values);
-  }
-  if (dtype === 'complex64') {
+  } else if (dtype === 'complex64') {
     throw new Error(
         `Cannot construct a complex64 tensor directly. ` +
         `Please use tf.complex(real, imag).`);
   }
-  if (typeof values === 'object' && 'texture' in values) {
+
+  if (typeof values === 'object' &&
+      (('texture' in values && values.texture instanceof WebGLTexture) ||
+       ('buffer' in values && values.buffer instanceof GPUBuffer))) {
     if (dtype !== 'float32' && dtype !== 'int32') {
       throw new Error(
-          `Creating tensor from texture only supports ` +
+          `Creating tensor from GPU data only supports ` +
           `'float32'|'int32' dtype, while the dtype is ${dtype}.`);
     }
-    values.channels = values.channels || 'RGBA';
-    return ENGINE.backend.createTensorFromTexture(
-        values, shape || inferredShape, dtype);
+    return ENGINE.backend.createTensorFromGPUData(
+        values as WebGLData | WebGPUData, shape || inferredShape, dtype);
   }
+
   if (!isTypedArray(values) && !Array.isArray(values) &&
       typeof values !== 'number' && typeof values !== 'boolean' &&
       typeof values !== 'string') {
diff --git a/tfjs-core/src/tensor_util_env.ts b/tfjs-core/src/tensor_util_env.ts
index 139257d491..c7705697f6 100644
--- a/tfjs-core/src/tensor_util_env.ts
+++ b/tfjs-core/src/tensor_util_env.ts
@@ -18,19 +18,27 @@
 import {ENGINE} from './engine';
 import {env} from './environment';
 import {Tensor} from './tensor';
-import {DataType, TensorLike, WebGLData} from './types';
+import {DataType, TensorLike, WebGLData, WebGPUData} from './types';
 import {assert, flatten, inferDtype, isTypedArray, toTypedArray} from './util';
 
 export function inferShape(
-    val: TensorLike|WebGLData, dtype?: DataType): number[] {
+    val: TensorLike|WebGLData|WebGPUData, dtype?: DataType): number[] {
   let firstElem: typeof val = val;
 
   if (isTypedArray(val)) {
     return dtype === 'string' ? [] : [val.length];
   }
-  if (typeof val === 'object' && 'texture' in val) {
-    const usedChannels = val.channels || 'RGBA';
-    return [val.height, val.width * usedChannels.length];
+  const isObject = typeof val === 'object';
+  if (isObject) {
+    if ('texture' in val && val.texture instanceof WebGLTexture) {
+      const usedChannels = val.channels || 'RGBA';
+      return [val.height, val.width * usedChannels.length];
+    } else if ('buffer' in val && val.buffer instanceof GPUBuffer) {
+      if (val.size == null) {
+        throw new Error('size should be defined in WebGPUData!');
+      }
+      return [val.size];
+    }
   }
   if (!Array.isArray(val)) {
     return [];  // Scalar.
diff --git a/tfjs-core/src/types.ts b/tfjs-core/src/types.ts
index 7e416e3c81..5bcb4231f6 100644
--- a/tfjs-core/src/types.ts
+++ b/tfjs-core/src/types.ts
@@ -182,3 +182,13 @@ export interface WebGLData {
   width: number;
   channels: WebGLChannels;
 }
+
+/**
+ * Type for representing a buffer data to create a tensor. Use default usage
+ * GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC. If not specified at
+ * creating a tensor, tensor type is float32.
+ */
+export interface WebGPUData {
+  buffer: GPUBuffer;
+  size: number;
+}
diff --git a/tfjs-core/src/util_base.ts b/tfjs-core/src/util_base.ts
index f4a6f32d22..132cc713d3 100644
--- a/tfjs-core/src/util_base.ts
+++ b/tfjs-core/src/util_base.ts
@@ -15,7 +15,7 @@
  * =============================================================================
  */
 
-import {DataType, DataTypeMap, FlatVector, NumericDataType, RecursiveArray, TensorLike, TypedArray, WebGLData} from './types';
+import {DataType, DataTypeMap, FlatVector, NumericDataType, RecursiveArray, TensorLike, TypedArray, WebGLData, WebGPUData} from './types';
 
 /**
  * Shuffles the array in-place using Fisher-Yates algorithm.
@@ -559,7 +559,7 @@ export function isNumber(value: {}): boolean {
   return typeof value === 'number';
 }
 
-export function inferDtype(values: TensorLike|WebGLData): DataType {
+export function inferDtype(values: TensorLike|WebGLData|WebGPUData): DataType {
   if (Array.isArray(values)) {
     return inferDtype(values[0]);
   }

From 2245c963db55444a5bfec26725b402cd4175dcac Mon Sep 17 00:00:00 2001
From: Xu Xing <xing.xu@intel.com>
Date: Fri, 11 Nov 2022 13:17:53 +0800
Subject: [PATCH 2/9] Fix CPU fail

---
 tfjs-core/src/ops/tensor_ops_util.ts | 4 ++--
 tfjs-core/src/tensor_util_env.ts     | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tfjs-core/src/ops/tensor_ops_util.ts b/tfjs-core/src/ops/tensor_ops_util.ts
index d72a11317e..197ccf1e30 100644
--- a/tfjs-core/src/ops/tensor_ops_util.ts
+++ b/tfjs-core/src/ops/tensor_ops_util.ts
@@ -34,8 +34,8 @@ export function makeTensor(
   }
 
   if (typeof values === 'object' &&
-      (('texture' in values && values.texture instanceof WebGLTexture) ||
-       ('buffer' in values && values.buffer instanceof GPUBuffer))) {
+      ('texture' in values ||
+       ('buffer' in values && !(values.buffer instanceof ArrayBuffer)))) {
     if (dtype !== 'float32' && dtype !== 'int32') {
       throw new Error(
           `Creating tensor from GPU data only supports ` +
diff --git a/tfjs-core/src/tensor_util_env.ts b/tfjs-core/src/tensor_util_env.ts
index c7705697f6..688c2bc73f 100644
--- a/tfjs-core/src/tensor_util_env.ts
+++ b/tfjs-core/src/tensor_util_env.ts
@@ -30,10 +30,10 @@ export function inferShape(
   }
   const isObject = typeof val === 'object';
   if (isObject) {
-    if ('texture' in val && val.texture instanceof WebGLTexture) {
+    if ('texture' in val) {
       const usedChannels = val.channels || 'RGBA';
       return [val.height, val.width * usedChannels.length];
-    } else if ('buffer' in val && val.buffer instanceof GPUBuffer) {
+    } else if ('buffer' in val && !(val.buffer instanceof ArrayBuffer)) {
       if (val.size == null) {
         throw new Error('size should be defined in WebGPUData!');
       }

From d6d7101c9bc2c38f29d9236a66baf600bd79e10e Mon Sep 17 00:00:00 2001
From: Xu Xing <xing.xu@intel.com>
Date: Fri, 11 Nov 2022 13:58:55 +0800
Subject: [PATCH 3/9] Remove size

---
 tfjs-backend-webgpu/src/backend_webgpu.ts     |  4 ++--
 .../src/backend_webgpu_test.ts                | 24 ++++---------------
 tfjs-core/src/ops/tensor.ts                   |  2 +-
 tfjs-core/src/tensor_util_env.ts              |  6 ++---
 tfjs-core/src/types.ts                        |  1 -
 5 files changed, 9 insertions(+), 28 deletions(-)

diff --git a/tfjs-backend-webgpu/src/backend_webgpu.ts b/tfjs-backend-webgpu/src/backend_webgpu.ts
index 44967371d7..81a5eef67a 100644
--- a/tfjs-backend-webgpu/src/backend_webgpu.ts
+++ b/tfjs-backend-webgpu/src/backend_webgpu.ts
@@ -461,9 +461,9 @@ export class WebGPUBackend extends KernelBackend {
     const sizeFromShape = util.sizeFromShape(tensorData.shape);
     const size =
         webgpu_util.GPUBytesPerElement(tensorData.dtype) * sizeFromShape;
-    if (values.size < sizeFromShape) {
+    if (values.buffer.size < sizeFromShape) {
       throw new Error(`GPUBuffer size(${
-          values.size}) is smaller than tensor size(${sizeFromShape})!`);
+          values.buffer.size}) is smaller than tensor size(${sizeFromShape})!`);
     }
 
     tensorData
diff --git a/tfjs-backend-webgpu/src/backend_webgpu_test.ts b/tfjs-backend-webgpu/src/backend_webgpu_test.ts
index 04cee1ec29..5f2ef22c62 100644
--- a/tfjs-backend-webgpu/src/backend_webgpu_test.ts
+++ b/tfjs-backend-webgpu/src/backend_webgpu_test.ts
@@ -415,9 +415,8 @@ async function testCreateTensorFromGPUBuffer(
   const shape: number[] = [aData.length];
   const startNumBytes = tf.memory().numBytes;
   const startNumTensors = tf.memory().numTensors;
-  const a = useDefaultShapeAndType ?
-      tf.tensor({buffer: aBuffer, size: aData.length}) :
-      tf.tensor({buffer: aBuffer, size: aData.length}, shape, dtype);
+  const a = useDefaultShapeAndType ? tf.tensor({buffer: aBuffer}) :
+                                     tf.tensor({buffer: aBuffer}, shape, dtype);
   const b = tf.tensor(bData, shape, dtype);
   const result = tf.add(a, b);
   tf.test_util.expectArraysClose(await result.data(), expected);
@@ -444,20 +443,6 @@ describeWebGPU('create tensor from GPUBuffer', () => {
     await testCreateTensorFromGPUBuffer('int32');
   });
 
-  it('throw when size is not set or incorrect', async () => {
-    const webGPUBackend = tf.backend() as WebGPUBackend;
-    const device = webGPUBackend.device;
-    const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-    const dtype = 'float32';
-    const aBuffer = await createReadonlyGPUBufferFromData(device, aData, dtype);
-    const shape: number[] = [aData.length];
-    const a = () => tf.tensor({buffer: aBuffer}, shape, dtype);
-    expect(a).toThrowError();
-    const b = () => tf.tensor({buffer: aBuffer, size: 0}, shape, dtype);
-    expect(b).toThrowError();
-    aBuffer.destroy();
-  });
-
   it('two tensors share the same GPUBuffer', async () => {
     const webGPUBackend = tf.backend() as WebGPUBackend;
     const device = webGPUBackend.device;
@@ -467,9 +452,8 @@ describeWebGPU('create tensor from GPUBuffer', () => {
     const startNumBytes = tf.memory().numBytes;
     const startNumTensors = tf.memory().numTensors;
     const shape: number[] = [aData.length];
-    const size = aData.length * 4;
-    const a = tf.tensor({buffer: aBuffer, size}, shape, dtype);
-    const b = tf.tensor({buffer: aBuffer, size}, shape, dtype);
+    const a = tf.tensor({buffer: aBuffer}, shape, dtype);
+    const b = tf.tensor({buffer: aBuffer}, shape, dtype);
     const result = tf.add(a, b);
     const expected =
         [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32];
diff --git a/tfjs-core/src/ops/tensor.ts b/tfjs-core/src/ops/tensor.ts
index 507454ee41..37f5a05fb9 100644
--- a/tfjs-core/src/ops/tensor.ts
+++ b/tfjs-core/src/ops/tensor.ts
@@ -145,7 +145,7 @@ import {makeTensor} from './tensor_ops_util';
  * const expected = [2, 4, 6, 8, 6, 8, 10, 12, 10, 12, 14, 16, 14, 16, 18, 20];
  * const aBuffer = await createReadonlyGPUBufferFromData(device, aData, dtype);
  * const shape = [aData.length];
- * const a = tf.tensor({buffer: aBuffer, size: aData.length});
+ * const a = tf.tensor({buffer: aBuffer}, shape, dtype);
  * const b = tf.tensor(bData, shape, dtype);
  * const result = tf.add(a, b);
  * a.dispose();
diff --git a/tfjs-core/src/tensor_util_env.ts b/tfjs-core/src/tensor_util_env.ts
index 688c2bc73f..e7be429742 100644
--- a/tfjs-core/src/tensor_util_env.ts
+++ b/tfjs-core/src/tensor_util_env.ts
@@ -20,6 +20,7 @@ import {env} from './environment';
 import {Tensor} from './tensor';
 import {DataType, TensorLike, WebGLData, WebGPUData} from './types';
 import {assert, flatten, inferDtype, isTypedArray, toTypedArray} from './util';
+import {bytesPerElement} from './util_base';
 
 export function inferShape(
     val: TensorLike|WebGLData|WebGPUData, dtype?: DataType): number[] {
@@ -34,10 +35,7 @@ export function inferShape(
       const usedChannels = val.channels || 'RGBA';
       return [val.height, val.width * usedChannels.length];
     } else if ('buffer' in val && !(val.buffer instanceof ArrayBuffer)) {
-      if (val.size == null) {
-        throw new Error('size should be defined in WebGPUData!');
-      }
-      return [val.size];
+      return [val.buffer.size / (dtype == null ? 4 : bytesPerElement(dtype))];
     }
   }
   if (!Array.isArray(val)) {
diff --git a/tfjs-core/src/types.ts b/tfjs-core/src/types.ts
index 5bcb4231f6..5c38df4c38 100644
--- a/tfjs-core/src/types.ts
+++ b/tfjs-core/src/types.ts
@@ -190,5 +190,4 @@ export interface WebGLData {
  */
 export interface WebGPUData {
   buffer: GPUBuffer;
-  size: number;
 }

From 2f92be58be8f5565307767ed8a0937fd40a56e74 Mon Sep 17 00:00:00 2001
From: Xu Xing <xing.xu@intel.com>
Date: Tue, 15 Nov 2022 15:01:02 +0800
Subject: [PATCH 4/9] Add test case

---
 tfjs-backend-webgpu/src/backend_webgpu.ts     | 12 +++---
 .../src/backend_webgpu_test.ts                | 39 +++++++++++++++++++
 tfjs-core/src/ops/tensor.ts                   |  4 +-
 3 files changed, 47 insertions(+), 8 deletions(-)

diff --git a/tfjs-backend-webgpu/src/backend_webgpu.ts b/tfjs-backend-webgpu/src/backend_webgpu.ts
index 81a5eef67a..a79ae32cd1 100644
--- a/tfjs-backend-webgpu/src/backend_webgpu.ts
+++ b/tfjs-backend-webgpu/src/backend_webgpu.ts
@@ -458,16 +458,14 @@ export class WebGPUBackend extends KernelBackend {
     this.tensorMap.set(
         dataId, {dtype, shape, values: null, refCount: 1, external: true});
     const tensorData = this.tensorMap.get(dataId);
-    const sizeFromShape = util.sizeFromShape(tensorData.shape);
-    const size =
-        webgpu_util.GPUBytesPerElement(tensorData.dtype) * sizeFromShape;
-    if (values.buffer.size < sizeFromShape) {
+    const size = webgpu_util.GPUBytesPerElement(tensorData.dtype) *
+        util.sizeFromShape(tensorData.shape);
+    if (values.buffer.size < size) {
       throw new Error(`GPUBuffer size(${
-          values.buffer.size}) is smaller than tensor size(${sizeFromShape})!`);
+          values.buffer.size}) is smaller than tensor size(${size})!`);
     }
 
-    tensorData
-        .resourceInfo = {size, usage: this.defaultGpuBufferUsage(), buffer};
+    tensorData.resourceInfo = {size: buffer.size, usage: buffer.usage, buffer};
     return engine().makeTensorFromDataId(dataId, shape, dtype, this);
   }
 
diff --git a/tfjs-backend-webgpu/src/backend_webgpu_test.ts b/tfjs-backend-webgpu/src/backend_webgpu_test.ts
index 5f2ef22c62..ec6c5298bf 100644
--- a/tfjs-backend-webgpu/src/backend_webgpu_test.ts
+++ b/tfjs-backend-webgpu/src/backend_webgpu_test.ts
@@ -467,4 +467,43 @@ describeWebGPU('create tensor from GPUBuffer', () => {
     expect(endNumTensors - startNumTensors).toEqual(0);
     aBuffer.destroy();
   });
+
+  it('GPUBuffer size is bigger than tensor size', async () => {
+    const webGPUBackend = tf.backend() as WebGPUBackend;
+    const device = webGPUBackend.device;
+    const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
+    const dtype = 'float32';
+    const aBuffer = await createReadonlyGPUBufferFromData(device, aData, dtype);
+    const startNumBytes = tf.memory().numBytes;
+    const startNumTensors = tf.memory().numTensors;
+    // GPUBuffer.size is bigger than shape size
+    const shape: number[] = [aData.length - 1];
+    const a = tf.tensor({buffer: aBuffer}, shape, dtype);
+    const b = tf.tensor({buffer: aBuffer}, shape, dtype);
+    const result = tf.add(a, b);
+    const expected = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30];
+    console.log(await result.data());
+    tf.test_util.expectArraysClose(await result.data(), expected);
+    a.dispose();
+    b.dispose();
+    result.dispose();
+    const endNumBytes = tf.memory().numBytes;
+    const endNumTensors = tf.memory().numTensors;
+    expect(endNumBytes - startNumBytes).toEqual(0);
+    expect(endNumTensors - startNumTensors).toEqual(0);
+    aBuffer.destroy();
+  });
+
+  it('throw for GPUBuffer size is smaller than tensor size', async () => {
+    const webGPUBackend = tf.backend() as WebGPUBackend;
+    const device = webGPUBackend.device;
+    const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
+    const dtype = 'float32';
+    const aBuffer = await createReadonlyGPUBufferFromData(device, aData, dtype);
+    // Throw when GPUBuffer.size is smaller than shape size
+    const shape: number[] = [aData.length + 1];
+    const a = () => tf.tensor({buffer: aBuffer}, shape, dtype);
+    expect(a).toThrowError();
+    aBuffer.destroy();
+  });
 });
diff --git a/tfjs-core/src/ops/tensor.ts b/tfjs-core/src/ops/tensor.ts
index 37f5a05fb9..82fcf2e01c 100644
--- a/tfjs-core/src/ops/tensor.ts
+++ b/tfjs-core/src/ops/tensor.ts
@@ -99,7 +99,9 @@ import {makeTensor} from './tensor_ops_util';
  * // This makes it possible for TF.js applications to avoid GPU / CPU sync.
  * // For example, if your application includes a preprocessing step on the GPU,
  * // you could upload the GPU output directly to TF.js, rather than first
- * // downloading the values.
+ * // downloading the values. Unlike WebGL, to support zero copy, this GPUBuffer
+ * // is bound directly by the tensor. So donot destroy this GPUBuffer until all
+ * // access are done.
  *
  * // Example for WebGPU:
  * async function createReadonlyGPUBufferFromData(device, data, dtype) {

From fcd7a22c8f6cf6b25d075def7f60c15d4f6ec9e3 Mon Sep 17 00:00:00 2001
From: Xu Xing <xing.xu@intel.com>
Date: Tue, 15 Nov 2022 15:49:25 +0800
Subject: [PATCH 5/9] Remove async and fix comments

---
 .../src/backend_webgpu_test.ts                | 11 +++----
 tfjs-core/src/ops/tensor.ts                   | 33 ++++++++++---------
 2 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/tfjs-backend-webgpu/src/backend_webgpu_test.ts b/tfjs-backend-webgpu/src/backend_webgpu_test.ts
index ec6c5298bf..6367528a6f 100644
--- a/tfjs-backend-webgpu/src/backend_webgpu_test.ts
+++ b/tfjs-backend-webgpu/src/backend_webgpu_test.ts
@@ -367,7 +367,7 @@ describeWebGPU('keeping data on gpu ', () => {
   });
 });
 
-async function createReadonlyGPUBufferFromData(
+function createReadonlyGPUBufferFromData(
     device: GPUDevice, data: number[], dtype: tf.DataType) {
   const bytesPerElement = 4;
   const sizeInBytes = data.length * bytesPerElement;
@@ -411,7 +411,7 @@ async function testCreateTensorFromGPUBuffer(
   const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
   const bData = [1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4];
   const expected = [2, 4, 6, 8, 6, 8, 10, 12, 10, 12, 14, 16, 14, 16, 18, 20];
-  const aBuffer = await createReadonlyGPUBufferFromData(device, aData, dtype);
+  const aBuffer = createReadonlyGPUBufferFromData(device, aData, dtype);
   const shape: number[] = [aData.length];
   const startNumBytes = tf.memory().numBytes;
   const startNumTensors = tf.memory().numTensors;
@@ -448,7 +448,7 @@ describeWebGPU('create tensor from GPUBuffer', () => {
     const device = webGPUBackend.device;
     const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
     const dtype = 'float32';
-    const aBuffer = await createReadonlyGPUBufferFromData(device, aData, dtype);
+    const aBuffer = createReadonlyGPUBufferFromData(device, aData, dtype);
     const startNumBytes = tf.memory().numBytes;
     const startNumTensors = tf.memory().numTensors;
     const shape: number[] = [aData.length];
@@ -473,7 +473,7 @@ describeWebGPU('create tensor from GPUBuffer', () => {
     const device = webGPUBackend.device;
     const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
     const dtype = 'float32';
-    const aBuffer = await createReadonlyGPUBufferFromData(device, aData, dtype);
+    const aBuffer = createReadonlyGPUBufferFromData(device, aData, dtype);
     const startNumBytes = tf.memory().numBytes;
     const startNumTensors = tf.memory().numTensors;
     // GPUBuffer.size is bigger than shape size
@@ -482,7 +482,6 @@ describeWebGPU('create tensor from GPUBuffer', () => {
     const b = tf.tensor({buffer: aBuffer}, shape, dtype);
     const result = tf.add(a, b);
     const expected = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30];
-    console.log(await result.data());
     tf.test_util.expectArraysClose(await result.data(), expected);
     a.dispose();
     b.dispose();
@@ -499,7 +498,7 @@ describeWebGPU('create tensor from GPUBuffer', () => {
     const device = webGPUBackend.device;
     const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
     const dtype = 'float32';
-    const aBuffer = await createReadonlyGPUBufferFromData(device, aData, dtype);
+    const aBuffer = createReadonlyGPUBufferFromData(device, aData, dtype);
     // Throw when GPUBuffer.size is smaller than shape size
     const shape: number[] = [aData.length + 1];
     const a = () => tf.tensor({buffer: aBuffer}, shape, dtype);
diff --git a/tfjs-core/src/ops/tensor.ts b/tfjs-core/src/ops/tensor.ts
index 82fcf2e01c..80e7569265 100644
--- a/tfjs-core/src/ops/tensor.ts
+++ b/tfjs-core/src/ops/tensor.ts
@@ -104,7 +104,7 @@ import {makeTensor} from './tensor_ops_util';
  * // access are done.
  *
  * // Example for WebGPU:
- * async function createReadonlyGPUBufferFromData(device, data, dtype) {
+ * function createReadonlyGPUBufferFromData(device, data, dtype) {
  *   const bytesPerElement = 4;
  *   const sizeInBytes = data.length * bytesPerElement;
  *
@@ -145,7 +145,7 @@ import {makeTensor} from './tensor_ops_util';
  * const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
  * const bData = [1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4];
  * const expected = [2, 4, 6, 8, 6, 8, 10, 12, 10, 12, 14, 16, 14, 16, 18, 20];
- * const aBuffer = await createReadonlyGPUBufferFromData(device, aData, dtype);
+ * const aBuffer = createReadonlyGPUBufferFromData(device, aData, dtype);
  * const shape = [aData.length];
  * const a = tf.tensor({buffer: aBuffer}, shape, dtype);
  * const b = tf.tensor(bData, shape, dtype);
@@ -156,19 +156,22 @@ import {makeTensor} from './tensor_ops_util';
  * aBuffer.destroy();
  * ```
  * @param values The values of the tensor. Can be nested array of numbers,
- *     or a flat array, or a `TypedArray`, or a `WebGLData` object. If the
- * values are strings, they will be encoded as utf-8 and kept as `Uint8Array[]`.
- * If the values is a `WebGLData` object, the dtype could only be 'float32' or
- * 'int32' and the object has to have: 1. texture, a `WebGLTexture`, the texture
- * must share the same `WebGLRenderingContext` with TFJS's WebGL backend (you
- * could create a custom WebGL backend from your texture's canvas) and the
- * internal texture format for the input texture must be floating point or
- * normalized integer; 2. height, the height of the texture; 3. width, the width
- * of the texture; 4. channels, a non-empty subset of 'RGBA', indicating the
- * values of which channels will be passed to the tensor, such as 'R' or 'BR'
- * (The order of the channels affect the order of tensor values. ). (If the
- * values passed from texture is less than the tensor size, zeros will be padded
- * at the rear.)
+ *     or a flat array, or a `TypedArray`, or a `WebGLData` object, or a
+ * `WebGPUData` object. If the values are strings, they will be encoded as utf-8
+ * and kept as `Uint8Array[]`. If the values is a `WebGLData` object, the dtype
+ * could only be 'float32' or 'int32' and the object has to have: 1. texture, a
+ * `WebGLTexture`, the texture must share the same `WebGLRenderingContext` with
+ * TFJS's WebGL backend (you could create a custom WebGL backend from your
+ * texture's canvas) and the internal texture format for the input texture must
+ * be floating point or normalized integer; 2. height, the height of the
+ * texture; 3. width, the width of the texture; 4. channels, a non-empty subset
+ * of 'RGBA', indicating the values of which channels will be passed to the
+ * tensor, such as 'R' or 'BR' (The order of the channels affect the order of
+ * tensor values. ). (If the values passed from texture is less than the tensor
+ * size, zeros will be padded at the rear.). If the values is a `WebGPUData`
+ * object, the dtype could only be 'float32' or 'int32 and the object has to
+ * have: buffer, a `GPUBuffer`, the buffer must share the same `GPUDevice` with
+ * TFJS's WebGPU backend.
  * @param shape The shape of the tensor. Optional. If not provided,
  *   it is inferred from `values`.
  * @param dtype The data type.

From 76ccb5a4d1fc94e1de317322351e6fc4bb48b097 Mon Sep 17 00:00:00 2001
From: Xu Xing <xing.xu@intel.com>
Date: Tue, 15 Nov 2022 16:47:46 +0800
Subject: [PATCH 6/9] Add read case and usage check

---
 tfjs-backend-webgpu/src/backend_webgpu.ts     |  4 ++
 .../src/backend_webgpu_test.ts                | 65 +++++++++++++++++--
 tfjs-core/src/ops/tensor.ts                   |  7 +-
 3 files changed, 67 insertions(+), 9 deletions(-)

diff --git a/tfjs-backend-webgpu/src/backend_webgpu.ts b/tfjs-backend-webgpu/src/backend_webgpu.ts
index a79ae32cd1..7aa8ddfd3e 100644
--- a/tfjs-backend-webgpu/src/backend_webgpu.ts
+++ b/tfjs-backend-webgpu/src/backend_webgpu.ts
@@ -463,6 +463,10 @@ export class WebGPUBackend extends KernelBackend {
     if (values.buffer.size < size) {
       throw new Error(`GPUBuffer size(${
           values.buffer.size}) is smaller than tensor size(${size})!`);
+    } else if (
+        (values.buffer.usage & GPUBufferUsage.STORAGE) !==
+        GPUBufferUsage.STORAGE) {
+      throw new Error('GPUBuffer.usage should include GPUBufferUsage.STORAGE!');
     }
 
     tensorData.resourceInfo = {size: buffer.size, usage: buffer.usage, buffer};
diff --git a/tfjs-backend-webgpu/src/backend_webgpu_test.ts b/tfjs-backend-webgpu/src/backend_webgpu_test.ts
index 6367528a6f..0d1af6f3e5 100644
--- a/tfjs-backend-webgpu/src/backend_webgpu_test.ts
+++ b/tfjs-backend-webgpu/src/backend_webgpu_test.ts
@@ -368,7 +368,8 @@ describeWebGPU('keeping data on gpu ', () => {
 });
 
 function createReadonlyGPUBufferFromData(
-    device: GPUDevice, data: number[], dtype: tf.DataType) {
+    device: GPUDevice, data: number[], dtype: tf.DataType,
+    bufferUsage = GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE) {
   const bytesPerElement = 4;
   const sizeInBytes = data.length * bytesPerElement;
 
@@ -389,11 +390,8 @@ function createReadonlyGPUBufferFromData(
   }
   gpuWriteBuffer.unmap();
 
-  const gpuReadBuffer = device.createBuffer({
-    mappedAtCreation: false,
-    size: sizeInBytes,
-    usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE
-  });
+  const gpuReadBuffer = device.createBuffer(
+      {mappedAtCreation: false, size: sizeInBytes, usage: bufferUsage});
 
   const copyEncoder = device.createCommandEncoder();
   copyEncoder.copyBufferToBuffer(
@@ -404,6 +402,30 @@ function createReadonlyGPUBufferFromData(
   return gpuReadBuffer;
 }
 
+function createStagingGPUBufferFromData(
+    device: GPUDevice, data: number[], dtype: tf.DataType) {
+  const bytesPerElement = 4;
+  const sizeInBytes = data.length * bytesPerElement;
+
+  const gpuWriteBuffer = device.createBuffer({
+    mappedAtCreation: true,
+    size: sizeInBytes,
+    usage: GPUBufferUsage.MAP_WRITE | GPUBufferUsage.COPY_SRC
+  });
+  const arrayBuffer = gpuWriteBuffer.getMappedRange();
+  if (dtype === 'float32') {
+    new Float32Array(arrayBuffer).set(data);
+  } else if (dtype === 'int32') {
+    new Int32Array(arrayBuffer).set(data);
+  } else {
+    throw new Error(
+        `Creating tensor from GPUBuffer only supports` +
+        `'float32'|'int32' dtype, while the dtype is ${dtype}.`);
+  }
+  gpuWriteBuffer.unmap();
+  return gpuWriteBuffer;
+}
+
 async function testCreateTensorFromGPUBuffer(
     dtype: tf.DataType, useDefaultShapeAndType = false) {
   const webGPUBackend = tf.backend() as WebGPUBackend;
@@ -443,6 +465,21 @@ describeWebGPU('create tensor from GPUBuffer', () => {
     await testCreateTensorFromGPUBuffer('int32');
   });
 
+  it('work for read', async () => {
+    const webGPUBackend = tf.backend() as WebGPUBackend;
+    const device = webGPUBackend.device;
+    const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
+    const dtype = 'float32';
+    const aBuffer = createReadonlyGPUBufferFromData(
+        device, aData, dtype,
+        GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE |
+            GPUBufferUsage.COPY_SRC);
+    const shape: number[] = [aData.length];
+    const a = tf.tensor({buffer: aBuffer}, shape, dtype);
+    await a.data();
+    aBuffer.destroy();
+  });
+
   it('two tensors share the same GPUBuffer', async () => {
     const webGPUBackend = tf.backend() as WebGPUBackend;
     const device = webGPUBackend.device;
@@ -493,7 +530,7 @@ describeWebGPU('create tensor from GPUBuffer', () => {
     aBuffer.destroy();
   });
 
-  it('throw for GPUBuffer size is smaller than tensor size', async () => {
+  it('throw when GPUBuffer size is smaller than tensor size', async () => {
     const webGPUBackend = tf.backend() as WebGPUBackend;
     const device = webGPUBackend.device;
     const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
@@ -505,4 +542,18 @@ describeWebGPU('create tensor from GPUBuffer', () => {
     expect(a).toThrowError();
     aBuffer.destroy();
   });
+
+  it('throw when GPUBuffer usage is not correct', async () => {
+    const webGPUBackend = tf.backend() as WebGPUBackend;
+    const device = webGPUBackend.device;
+    const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
+    const dtype = 'float32';
+    // Create a GPUBuffer without GPUBufferUsage.STORAGE.
+    const aBuffer = createStagingGPUBufferFromData(device, aData, dtype);
+    // Throw when GPUBuffer usage is not correct.
+    const shape: number[] = [aData.length];
+    const a = () => tf.tensor({buffer: aBuffer}, shape, dtype);
+    expect(a).toThrowError();
+    aBuffer.destroy();
+  });
 });
diff --git a/tfjs-core/src/ops/tensor.ts b/tfjs-core/src/ops/tensor.ts
index 80e7569265..e9dcd04366 100644
--- a/tfjs-core/src/ops/tensor.ts
+++ b/tfjs-core/src/ops/tensor.ts
@@ -170,8 +170,11 @@ import {makeTensor} from './tensor_ops_util';
  * tensor values. ). (If the values passed from texture is less than the tensor
  * size, zeros will be padded at the rear.). If the values is a `WebGPUData`
  * object, the dtype could only be 'float32' or 'int32 and the object has to
- * have: buffer, a `GPUBuffer`, the buffer must share the same `GPUDevice` with
- * TFJS's WebGPU backend.
+ * have: buffer, a `GPUBuffer`. The buffer must: 1. share the same `GPUDevice`
+ * with TFJS's WebGPU backend; 2.buffer.usage should at least support
+ * GPUBufferUsage.STORAGE, to support tensor.data, GPUBufferUsage.COPY_SRC is
+ * also required; 3. buffer.size should not be smaller than the byte size of
+ * tensor shape.
  * @param shape The shape of the tensor. Optional. If not provided,
  *   it is inferred from `values`.
  * @param dtype The data type.

From 54ed5e9038f53ff6a0d073c8d3e74a32e133f933 Mon Sep 17 00:00:00 2001
From: Xu Xing <xing.xu@intel.com>
Date: Mon, 21 Nov 2022 15:09:27 +0800
Subject: [PATCH 7/9] Add flag WEBGPU_TENSOR_FROM_BUFFER_WITH_ZERO_COPY

---
 tfjs-backend-webgpu/src/backend_webgpu.ts     | 39 +++++++++---
 .../src/backend_webgpu_test.ts                | 63 ++++++++++++++-----
 tfjs-backend-webgpu/src/flags_webgpu.ts       |  8 ++-
 tfjs-core/src/ops/tensor.ts                   | 14 ++---
 4 files changed, 91 insertions(+), 33 deletions(-)

diff --git a/tfjs-backend-webgpu/src/backend_webgpu.ts b/tfjs-backend-webgpu/src/backend_webgpu.ts
index 7aa8ddfd3e..99c1cff681 100644
--- a/tfjs-backend-webgpu/src/backend_webgpu.ts
+++ b/tfjs-backend-webgpu/src/backend_webgpu.ts
@@ -51,8 +51,12 @@ type TensorData = {
   shape: number[],
   refCount: number,
   resourceInfo?: BufferInfo|TextureInfo,
-  // Indicate the tensor is created from an external GPU resource.
-  external?: boolean,
+  // zeroCopy is used for creating tensor from GPUBuffer. When zeroCopy is false
+  // or undefined (default), this GPUBuffer will be copied to the tensor's
+  // resource buffer. When zeroCopy is true, tensor will use this GPUBUffer as
+  // tensor's resource buffer, user should not destroy this GPUBuffer until all
+  // access are done.
+  zeroCopy?: boolean,
   // For complex numbers, the real and imaginary parts are stored as their own
   // individual tensors, with a parent joining the two with the
   // complexTensorInfos field.
@@ -244,8 +248,9 @@ export class WebGPUBackend extends KernelBackend {
     if (!tensorData || !tensorData.resourceInfo) {
       return;
     }
-    // If tensor data is from external resource, do not release.
-    if (tensorData.external) {
+    // If tensor's resource buffer is from a zero copy GPUBuffer, do not
+    // release.
+    if (tensorData.zeroCopy) {
       tensorData.resourceInfo = null;
       return;
     }
@@ -445,18 +450,31 @@ export class WebGPUBackend extends KernelBackend {
     return vals;
   }
 
+  // The source GPUBuffer and destination GPUBuffer have the same size and
+  // usage.
+  private copyBuffer(srcBuffer: GPUBuffer, size: number, usage: number) {
+    const dstBuffer = this.bufferManager.acquireBuffer(size, usage);
+    this.ensureCommandEncoderReady();
+    this.ensureComputePassEnded();
+    this.currentCommandEncoder.copyBufferToBuffer(
+        srcBuffer, 0, dstBuffer, 0, size);
+    this.submitQueue();
+    return dstBuffer;
+  }
+
   /**
    * Create a TF.js tensor out of an existing WebGPU buffer.
    */
   override createTensorFromGPUData(
       values: WebGPUData, shape: number[], dtype: DataType): Tensor {
-    const buffer = values.buffer;
+    let buffer = values.buffer;
     if (dtype === 'complex64') {
       throw new Error(`Cannot write to a complex64 dtype. `);
     }
     const dataId = {id: this.nextDataId()};
+    const zeroCopy = env().getBool('WEBGPU_TENSOR_FROM_BUFFER_WITH_ZERO_COPY');
     this.tensorMap.set(
-        dataId, {dtype, shape, values: null, refCount: 1, external: true});
+        dataId, {dtype, shape, values: null, refCount: 1, zeroCopy});
     const tensorData = this.tensorMap.get(dataId);
     const size = webgpu_util.GPUBytesPerElement(tensorData.dtype) *
         util.sizeFromShape(tensorData.shape);
@@ -469,6 +487,10 @@ export class WebGPUBackend extends KernelBackend {
       throw new Error('GPUBuffer.usage should include GPUBufferUsage.STORAGE!');
     }
 
+    // Do buffer copy by default.
+    if (zeroCopy === false) {
+      buffer = this.copyBuffer(buffer, size, buffer.usage);
+    }
     tensorData.resourceInfo = {size: buffer.size, usage: buffer.usage, buffer};
     return engine().makeTensorFromDataId(dataId, shape, dtype, this);
   }
@@ -659,9 +681,8 @@ export class WebGPUBackend extends KernelBackend {
       // TODO: WebGPU doesn't support read data synchronously from GPU to CPU.
       // So it will report error when switching backend from WebGPU to others.
       // There are two situations: 1) swithcing the backend after running a
-      // model; 2) swithcing the backend within the model. Temporarilly keep the
-      // values on CPU to solve the first issue.
-      // tensorData.values = null;
+      // model; 2) swithcing the backend within the model. Temporarilly keep
+      // the values on CPU to solve the first issue. tensorData.values = null;
     }
   }
 
diff --git a/tfjs-backend-webgpu/src/backend_webgpu_test.ts b/tfjs-backend-webgpu/src/backend_webgpu_test.ts
index 0d1af6f3e5..2076b5d8a1 100644
--- a/tfjs-backend-webgpu/src/backend_webgpu_test.ts
+++ b/tfjs-backend-webgpu/src/backend_webgpu_test.ts
@@ -369,7 +369,8 @@ describeWebGPU('keeping data on gpu ', () => {
 
 function createReadonlyGPUBufferFromData(
     device: GPUDevice, data: number[], dtype: tf.DataType,
-    bufferUsage = GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE) {
+    bufferUsage = GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE |
+        GPUBufferUsage.COPY_SRC) {
   const bytesPerElement = 4;
   const sizeInBytes = data.length * bytesPerElement;
 
@@ -427,7 +428,7 @@ function createStagingGPUBufferFromData(
 }
 
 async function testCreateTensorFromGPUBuffer(
-    dtype: tf.DataType, useDefaultShapeAndType = false) {
+    dtype: tf.DataType, useDefaultShapeAndType = false, zeroCopy = false) {
   const webGPUBackend = tf.backend() as WebGPUBackend;
   const device = webGPUBackend.device;
   const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
@@ -439,6 +440,9 @@ async function testCreateTensorFromGPUBuffer(
   const startNumTensors = tf.memory().numTensors;
   const a = useDefaultShapeAndType ? tf.tensor({buffer: aBuffer}) :
                                      tf.tensor({buffer: aBuffer}, shape, dtype);
+  if (zeroCopy !== true) {
+    aBuffer.destroy();
+  }
   const b = tf.tensor(bData, shape, dtype);
   const result = tf.add(a, b);
   tf.test_util.expectArraysClose(await result.data(), expected);
@@ -449,20 +453,22 @@ async function testCreateTensorFromGPUBuffer(
   const endNumTensors = tf.memory().numTensors;
   expect(endNumBytes - startNumBytes).toEqual(0);
   expect(endNumTensors - startNumTensors).toEqual(0);
-  aBuffer.destroy();
+  if (zeroCopy === true) {
+    aBuffer.destroy();
+  }
 }
 
-describeWebGPU('create tensor from GPUBuffer', () => {
+function createTensorFromGPUTest(zeroCopy = false) {
   it('use default shape and data type(float32)', async () => {
-    await testCreateTensorFromGPUBuffer('float32', true);
+    await testCreateTensorFromGPUBuffer('float32', true, zeroCopy);
   });
 
   it('work for float32', async () => {
-    await testCreateTensorFromGPUBuffer('float32');
+    await testCreateTensorFromGPUBuffer('float32', false, zeroCopy);
   });
 
   it('work for int32', async () => {
-    await testCreateTensorFromGPUBuffer('int32');
+    await testCreateTensorFromGPUBuffer('int32', false, zeroCopy);
   });
 
   it('work for read', async () => {
@@ -470,14 +476,16 @@ describeWebGPU('create tensor from GPUBuffer', () => {
     const device = webGPUBackend.device;
     const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
     const dtype = 'float32';
-    const aBuffer = createReadonlyGPUBufferFromData(
-        device, aData, dtype,
-        GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE |
-            GPUBufferUsage.COPY_SRC);
+    const aBuffer = createReadonlyGPUBufferFromData(device, aData, dtype);
     const shape: number[] = [aData.length];
     const a = tf.tensor({buffer: aBuffer}, shape, dtype);
+    if (zeroCopy !== true) {
+      aBuffer.destroy();
+    }
     await a.data();
-    aBuffer.destroy();
+    if (zeroCopy === true) {
+      aBuffer.destroy();
+    }
   });
 
   it('two tensors share the same GPUBuffer', async () => {
@@ -491,6 +499,9 @@ describeWebGPU('create tensor from GPUBuffer', () => {
     const shape: number[] = [aData.length];
     const a = tf.tensor({buffer: aBuffer}, shape, dtype);
     const b = tf.tensor({buffer: aBuffer}, shape, dtype);
+    if (zeroCopy !== true) {
+      aBuffer.destroy();
+    }
     const result = tf.add(a, b);
     const expected =
         [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32];
@@ -502,7 +513,9 @@ describeWebGPU('create tensor from GPUBuffer', () => {
     const endNumTensors = tf.memory().numTensors;
     expect(endNumBytes - startNumBytes).toEqual(0);
     expect(endNumTensors - startNumTensors).toEqual(0);
-    aBuffer.destroy();
+    if (zeroCopy === true) {
+      aBuffer.destroy();
+    }
   });
 
   it('GPUBuffer size is bigger than tensor size', async () => {
@@ -517,6 +530,9 @@ describeWebGPU('create tensor from GPUBuffer', () => {
     const shape: number[] = [aData.length - 1];
     const a = tf.tensor({buffer: aBuffer}, shape, dtype);
     const b = tf.tensor({buffer: aBuffer}, shape, dtype);
+    if (zeroCopy !== true) {
+      aBuffer.destroy();
+    }
     const result = tf.add(a, b);
     const expected = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30];
     tf.test_util.expectArraysClose(await result.data(), expected);
@@ -527,7 +543,9 @@ describeWebGPU('create tensor from GPUBuffer', () => {
     const endNumTensors = tf.memory().numTensors;
     expect(endNumBytes - startNumBytes).toEqual(0);
     expect(endNumTensors - startNumTensors).toEqual(0);
-    aBuffer.destroy();
+    if (zeroCopy === true) {
+      aBuffer.destroy();
+    }
   });
 
   it('throw when GPUBuffer size is smaller than tensor size', async () => {
@@ -556,4 +574,21 @@ describeWebGPU('create tensor from GPUBuffer', () => {
     expect(a).toThrowError();
     aBuffer.destroy();
   });
+}
+
+describeWebGPU('create tensor from GPUBuffer', () => {
+  createTensorFromGPUTest();
+});
+
+describeWebGPU('create tensor from GPUBuffer with zero copy', () => {
+  let savedZeroCopyFlag = false;
+  beforeAll(() => {
+    savedZeroCopyFlag =
+        tf.env().get('WEBGPU_TENSOR_FROM_BUFFER_WITH_ZERO_COPY') as boolean;
+    tf.env().set('WEBGPU_TENSOR_FROM_BUFFER_WITH_ZERO_COPY', true);
+  });
+  afterAll(() => {
+    tf.env().set('WEBGPU_TENSOR_FROM_BUFFER_WITH_ZERO_COPY', savedZeroCopyFlag);
+  });
+  createTensorFromGPUTest(true);
 });
diff --git a/tfjs-backend-webgpu/src/flags_webgpu.ts b/tfjs-backend-webgpu/src/flags_webgpu.ts
index f639b67e4d..31ecc1ebc9 100644
--- a/tfjs-backend-webgpu/src/flags_webgpu.ts
+++ b/tfjs-backend-webgpu/src/flags_webgpu.ts
@@ -76,10 +76,14 @@ ENV.registerFlag('WEBGPU_USE_NAIVE_CONV2D_DEBUG', () => false);
  * are dispatched, it means the hardware may be in low occupancy.
  * 0 means it's not set by the user. A default strategy will be applied.
  */
-ENV.registerFlag(
-    'WEBGPU_THRESHOLD_TO_INCREASE_WORKGROUPS_FOR_MATMUL', () => 0);
+ENV.registerFlag('WEBGPU_THRESHOLD_TO_INCREASE_WORKGROUPS_FOR_MATMUL', () => 0);
 
 /**
  * Whether we will run im2col as a separate shader for convolution.
  */
 ENV.registerFlag('WEBGPU_CONV_SEPARATE_IM2COL_SHADER', () => false);
+
+/**
+ * Whether use zero copy when create tensor from GPUBuffer.
+ */
+ENV.registerFlag('WEBGPU_TENSOR_FROM_BUFFER_WITH_ZERO_COPY', () => false);
diff --git a/tfjs-core/src/ops/tensor.ts b/tfjs-core/src/ops/tensor.ts
index e9dcd04366..6d4ba28871 100644
--- a/tfjs-core/src/ops/tensor.ts
+++ b/tfjs-core/src/ops/tensor.ts
@@ -99,9 +99,7 @@ import {makeTensor} from './tensor_ops_util';
  * // This makes it possible for TF.js applications to avoid GPU / CPU sync.
  * // For example, if your application includes a preprocessing step on the GPU,
  * // you could upload the GPU output directly to TF.js, rather than first
- * // downloading the values. Unlike WebGL, to support zero copy, this GPUBuffer
- * // is bound directly by the tensor. So donot destroy this GPUBuffer until all
- * // access are done.
+ * // downloading the values.
  *
  * // Example for WebGPU:
  * function createReadonlyGPUBufferFromData(device, data, dtype) {
@@ -128,7 +126,8 @@ import {makeTensor} from './tensor_ops_util';
  *   const gpuReadBuffer = device.createBuffer({
  *     mappedAtCreation: false,
  *     size: sizeInBytes,
- *     usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE
+ *     usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE |
+ *         GPUBufferUsage.COPY_SRC
  *   });
  *
  *   const copyEncoder = device.createCommandEncoder();
@@ -171,10 +170,9 @@ import {makeTensor} from './tensor_ops_util';
  * size, zeros will be padded at the rear.). If the values is a `WebGPUData`
  * object, the dtype could only be 'float32' or 'int32 and the object has to
  * have: buffer, a `GPUBuffer`. The buffer must: 1. share the same `GPUDevice`
- * with TFJS's WebGPU backend; 2.buffer.usage should at least support
- * GPUBufferUsage.STORAGE, to support tensor.data, GPUBufferUsage.COPY_SRC is
- * also required; 3. buffer.size should not be smaller than the byte size of
- * tensor shape.
+ * with TFJS's WebGPU backend; 2. buffer.usage should at least support
+ * GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC; 3. buffer.size should not
+ * be smaller than the byte size of tensor shape.
  * @param shape The shape of the tensor. Optional. If not provided,
  *   it is inferred from `values`.
  * @param dtype The data type.

From 8a7a39c68ec26c36958d60fa945715583daa18da Mon Sep 17 00:00:00 2001
From: Xu Xing <xing.xu@intel.com>
Date: Tue, 22 Nov 2022 09:49:31 +0800
Subject: [PATCH 8/9] Support zeroCopy in WebGPUData

---
 tfjs-backend-webgpu/src/backend_webgpu.ts     | 28 ++++---
 .../src/backend_webgpu_test.ts                | 78 +++++++------------
 tfjs-backend-webgpu/src/flags_webgpu.ts       |  5 --
 tfjs-core/src/ops/tensor.ts                   | 18 ++++-
 tfjs-core/src/types.ts                        | 11 ++-
 5 files changed, 63 insertions(+), 77 deletions(-)

diff --git a/tfjs-backend-webgpu/src/backend_webgpu.ts b/tfjs-backend-webgpu/src/backend_webgpu.ts
index 99c1cff681..fbe1c7ebf2 100644
--- a/tfjs-backend-webgpu/src/backend_webgpu.ts
+++ b/tfjs-backend-webgpu/src/backend_webgpu.ts
@@ -51,12 +51,9 @@ type TensorData = {
   shape: number[],
   refCount: number,
   resourceInfo?: BufferInfo|TextureInfo,
-  // zeroCopy is used for creating tensor from GPUBuffer. When zeroCopy is false
-  // or undefined (default), this GPUBuffer will be copied to the tensor's
-  // resource buffer. When zeroCopy is true, tensor will use this GPUBUffer as
-  // tensor's resource buffer, user should not destroy this GPUBuffer until all
-  // access are done.
-  zeroCopy?: boolean,
+  // external is true means we use the resource provided by users directly
+  // (without a copy), so users should be responsible for its release.
+  external?: boolean,
   // For complex numbers, the real and imaginary parts are stored as their own
   // individual tensors, with a parent joining the two with the
   // complexTensorInfos field.
@@ -248,9 +245,8 @@ export class WebGPUBackend extends KernelBackend {
     if (!tensorData || !tensorData.resourceInfo) {
       return;
     }
-    // If tensor's resource buffer is from a zero copy GPUBuffer, do not
-    // release.
-    if (tensorData.zeroCopy) {
+    // If tensor's resource is from external, do not release.
+    if (tensorData.external) {
       tensorData.resourceInfo = null;
       return;
     }
@@ -472,9 +468,9 @@ export class WebGPUBackend extends KernelBackend {
       throw new Error(`Cannot write to a complex64 dtype. `);
     }
     const dataId = {id: this.nextDataId()};
-    const zeroCopy = env().getBool('WEBGPU_TENSOR_FROM_BUFFER_WITH_ZERO_COPY');
     this.tensorMap.set(
-        dataId, {dtype, shape, values: null, refCount: 1, zeroCopy});
+        dataId,
+        {dtype, shape, values: null, refCount: 1, external: values.zeroCopy});
     const tensorData = this.tensorMap.get(dataId);
     const size = webgpu_util.GPUBytesPerElement(tensorData.dtype) *
         util.sizeFromShape(tensorData.shape);
@@ -482,13 +478,15 @@ export class WebGPUBackend extends KernelBackend {
       throw new Error(`GPUBuffer size(${
           values.buffer.size}) is smaller than tensor size(${size})!`);
     } else if (
-        (values.buffer.usage & GPUBufferUsage.STORAGE) !==
-        GPUBufferUsage.STORAGE) {
-      throw new Error('GPUBuffer.usage should include GPUBufferUsage.STORAGE!');
+        (values.buffer.usage &
+         (GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC)) !==
+        (GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC)) {
+      throw new Error(
+          'GPUBuffer.usage should include GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC!');
     }
 
     // Do buffer copy by default.
-    if (zeroCopy === false) {
+    if (values.zeroCopy !== true) {
       buffer = this.copyBuffer(buffer, size, buffer.usage);
     }
     tensorData.resourceInfo = {size: buffer.size, usage: buffer.usage, buffer};
diff --git a/tfjs-backend-webgpu/src/backend_webgpu_test.ts b/tfjs-backend-webgpu/src/backend_webgpu_test.ts
index 2076b5d8a1..d41b241ba3 100644
--- a/tfjs-backend-webgpu/src/backend_webgpu_test.ts
+++ b/tfjs-backend-webgpu/src/backend_webgpu_test.ts
@@ -367,10 +367,8 @@ describeWebGPU('keeping data on gpu ', () => {
   });
 });
 
-function createReadonlyGPUBufferFromData(
-    device: GPUDevice, data: number[], dtype: tf.DataType,
-    bufferUsage = GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE |
-        GPUBufferUsage.COPY_SRC) {
+function createStagingGPUBufferFromData(
+    device: GPUDevice, data: number[], dtype: tf.DataType) {
   const bytesPerElement = 4;
   const sizeInBytes = data.length * bytesPerElement;
 
@@ -390,7 +388,17 @@ function createReadonlyGPUBufferFromData(
         `'float32'|'int32' dtype, while the dtype is ${dtype}.`);
   }
   gpuWriteBuffer.unmap();
+  return gpuWriteBuffer;
+}
 
+function createGPUBufferFromData(
+    device: GPUDevice, data: number[], dtype: tf.DataType,
+    bufferUsage = GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE |
+        GPUBufferUsage.COPY_SRC) {
+  const bytesPerElement = 4;
+  const sizeInBytes = data.length * bytesPerElement;
+
+  const gpuWriteBuffer = createStagingGPUBufferFromData(device, data, dtype);
   const gpuReadBuffer = device.createBuffer(
       {mappedAtCreation: false, size: sizeInBytes, usage: bufferUsage});
 
@@ -403,30 +411,6 @@ function createReadonlyGPUBufferFromData(
   return gpuReadBuffer;
 }
 
-function createStagingGPUBufferFromData(
-    device: GPUDevice, data: number[], dtype: tf.DataType) {
-  const bytesPerElement = 4;
-  const sizeInBytes = data.length * bytesPerElement;
-
-  const gpuWriteBuffer = device.createBuffer({
-    mappedAtCreation: true,
-    size: sizeInBytes,
-    usage: GPUBufferUsage.MAP_WRITE | GPUBufferUsage.COPY_SRC
-  });
-  const arrayBuffer = gpuWriteBuffer.getMappedRange();
-  if (dtype === 'float32') {
-    new Float32Array(arrayBuffer).set(data);
-  } else if (dtype === 'int32') {
-    new Int32Array(arrayBuffer).set(data);
-  } else {
-    throw new Error(
-        `Creating tensor from GPUBuffer only supports` +
-        `'float32'|'int32' dtype, while the dtype is ${dtype}.`);
-  }
-  gpuWriteBuffer.unmap();
-  return gpuWriteBuffer;
-}
-
 async function testCreateTensorFromGPUBuffer(
     dtype: tf.DataType, useDefaultShapeAndType = false, zeroCopy = false) {
   const webGPUBackend = tf.backend() as WebGPUBackend;
@@ -434,12 +418,13 @@ async function testCreateTensorFromGPUBuffer(
   const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
   const bData = [1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4];
   const expected = [2, 4, 6, 8, 6, 8, 10, 12, 10, 12, 14, 16, 14, 16, 18, 20];
-  const aBuffer = createReadonlyGPUBufferFromData(device, aData, dtype);
+  const aBuffer = createGPUBufferFromData(device, aData, dtype);
   const shape: number[] = [aData.length];
   const startNumBytes = tf.memory().numBytes;
   const startNumTensors = tf.memory().numTensors;
-  const a = useDefaultShapeAndType ? tf.tensor({buffer: aBuffer}) :
-                                     tf.tensor({buffer: aBuffer}, shape, dtype);
+  const webGPUData = {buffer: aBuffer, zeroCopy};
+  const a = useDefaultShapeAndType ? tf.tensor(webGPUData) :
+                                     tf.tensor(webGPUData, shape, dtype);
   if (zeroCopy !== true) {
     aBuffer.destroy();
   }
@@ -476,9 +461,9 @@ function createTensorFromGPUTest(zeroCopy = false) {
     const device = webGPUBackend.device;
     const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
     const dtype = 'float32';
-    const aBuffer = createReadonlyGPUBufferFromData(device, aData, dtype);
+    const aBuffer = createGPUBufferFromData(device, aData, dtype);
     const shape: number[] = [aData.length];
-    const a = tf.tensor({buffer: aBuffer}, shape, dtype);
+    const a = tf.tensor({buffer: aBuffer, zeroCopy}, shape, dtype);
     if (zeroCopy !== true) {
       aBuffer.destroy();
     }
@@ -493,12 +478,13 @@ function createTensorFromGPUTest(zeroCopy = false) {
     const device = webGPUBackend.device;
     const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
     const dtype = 'float32';
-    const aBuffer = createReadonlyGPUBufferFromData(device, aData, dtype);
+    const aBuffer = createGPUBufferFromData(device, aData, dtype);
     const startNumBytes = tf.memory().numBytes;
     const startNumTensors = tf.memory().numTensors;
     const shape: number[] = [aData.length];
-    const a = tf.tensor({buffer: aBuffer}, shape, dtype);
-    const b = tf.tensor({buffer: aBuffer}, shape, dtype);
+    const webGPUData = {buffer: aBuffer, zeroCopy};
+    const a = tf.tensor(webGPUData, shape, dtype);
+    const b = tf.tensor(webGPUData, shape, dtype);
     if (zeroCopy !== true) {
       aBuffer.destroy();
     }
@@ -523,13 +509,14 @@ function createTensorFromGPUTest(zeroCopy = false) {
     const device = webGPUBackend.device;
     const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
     const dtype = 'float32';
-    const aBuffer = createReadonlyGPUBufferFromData(device, aData, dtype);
+    const aBuffer = createGPUBufferFromData(device, aData, dtype);
     const startNumBytes = tf.memory().numBytes;
     const startNumTensors = tf.memory().numTensors;
     // GPUBuffer.size is bigger than shape size
     const shape: number[] = [aData.length - 1];
-    const a = tf.tensor({buffer: aBuffer}, shape, dtype);
-    const b = tf.tensor({buffer: aBuffer}, shape, dtype);
+    const webGPUData = {buffer: aBuffer, zeroCopy};
+    const a = tf.tensor(webGPUData, shape, dtype);
+    const b = tf.tensor(webGPUData, shape, dtype);
     if (zeroCopy !== true) {
       aBuffer.destroy();
     }
@@ -553,7 +540,7 @@ function createTensorFromGPUTest(zeroCopy = false) {
     const device = webGPUBackend.device;
     const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
     const dtype = 'float32';
-    const aBuffer = createReadonlyGPUBufferFromData(device, aData, dtype);
+    const aBuffer = createGPUBufferFromData(device, aData, dtype);
     // Throw when GPUBuffer.size is smaller than shape size
     const shape: number[] = [aData.length + 1];
     const a = () => tf.tensor({buffer: aBuffer}, shape, dtype);
@@ -570,7 +557,7 @@ function createTensorFromGPUTest(zeroCopy = false) {
     const aBuffer = createStagingGPUBufferFromData(device, aData, dtype);
     // Throw when GPUBuffer usage is not correct.
     const shape: number[] = [aData.length];
-    const a = () => tf.tensor({buffer: aBuffer}, shape, dtype);
+    const a = () => tf.tensor({buffer: aBuffer, zeroCopy}, shape, dtype);
     expect(a).toThrowError();
     aBuffer.destroy();
   });
@@ -581,14 +568,5 @@ describeWebGPU('create tensor from GPUBuffer', () => {
 });
 
 describeWebGPU('create tensor from GPUBuffer with zero copy', () => {
-  let savedZeroCopyFlag = false;
-  beforeAll(() => {
-    savedZeroCopyFlag =
-        tf.env().get('WEBGPU_TENSOR_FROM_BUFFER_WITH_ZERO_COPY') as boolean;
-    tf.env().set('WEBGPU_TENSOR_FROM_BUFFER_WITH_ZERO_COPY', true);
-  });
-  afterAll(() => {
-    tf.env().set('WEBGPU_TENSOR_FROM_BUFFER_WITH_ZERO_COPY', savedZeroCopyFlag);
-  });
   createTensorFromGPUTest(true);
 });
diff --git a/tfjs-backend-webgpu/src/flags_webgpu.ts b/tfjs-backend-webgpu/src/flags_webgpu.ts
index 31ecc1ebc9..49ab70ffba 100644
--- a/tfjs-backend-webgpu/src/flags_webgpu.ts
+++ b/tfjs-backend-webgpu/src/flags_webgpu.ts
@@ -82,8 +82,3 @@ ENV.registerFlag('WEBGPU_THRESHOLD_TO_INCREASE_WORKGROUPS_FOR_MATMUL', () => 0);
  * Whether we will run im2col as a separate shader for convolution.
  */
 ENV.registerFlag('WEBGPU_CONV_SEPARATE_IM2COL_SHADER', () => false);
-
-/**
- * Whether use zero copy when create tensor from GPUBuffer.
- */
-ENV.registerFlag('WEBGPU_TENSOR_FROM_BUFFER_WITH_ZERO_COPY', () => false);
diff --git a/tfjs-core/src/ops/tensor.ts b/tfjs-core/src/ops/tensor.ts
index 6d4ba28871..72ff5bfc11 100644
--- a/tfjs-core/src/ops/tensor.ts
+++ b/tfjs-core/src/ops/tensor.ts
@@ -99,10 +99,14 @@ import {makeTensor} from './tensor_ops_util';
  * // This makes it possible for TF.js applications to avoid GPU / CPU sync.
  * // For example, if your application includes a preprocessing step on the GPU,
  * // you could upload the GPU output directly to TF.js, rather than first
- * // downloading the values.
+ * // downloading the values. Unlike WebGL, this optionally supports zero copy
+ * // by WebGPUData.zeroCopy. When zeroCopy is false or undefined(default), this
+ * // passing GPUBuffer can be destroyed after tensor is created. When zeroCopy
+ * // is true, this GPUBuffer is bound directly by the tensor, so donot destroy
+ * // this GPUBuffer until all access is done.
  *
  * // Example for WebGPU:
- * function createReadonlyGPUBufferFromData(device, data, dtype) {
+ * function createGPUBufferFromData(device, data, dtype) {
  *   const bytesPerElement = 4;
  *   const sizeInBytes = data.length * bytesPerElement;
  *
@@ -144,8 +148,10 @@ import {makeTensor} from './tensor_ops_util';
  * const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
  * const bData = [1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4];
  * const expected = [2, 4, 6, 8, 6, 8, 10, 12, 10, 12, 14, 16, 14, 16, 18, 20];
- * const aBuffer = createReadonlyGPUBufferFromData(device, aData, dtype);
+ * const aBuffer = createGPUBufferFromData(device, aData, dtype);
  * const shape = [aData.length];
+ * // To use zeroCopy, use {buffer: aBuffer, zeroCopy: true} instead and destroy
+ * // aBuffer untill all access is done.
  * const a = tf.tensor({buffer: aBuffer}, shape, dtype);
  * const b = tf.tensor(bData, shape, dtype);
  * const result = tf.add(a, b);
@@ -172,7 +178,11 @@ import {makeTensor} from './tensor_ops_util';
  * have: buffer, a `GPUBuffer`. The buffer must: 1. share the same `GPUDevice`
  * with TFJS's WebGPU backend; 2. buffer.usage should at least support
  * GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC; 3. buffer.size should not
- * be smaller than the byte size of tensor shape.
+ * be smaller than the byte size of tensor shape. WebGPUData optionally supports
+ * zero copy by flag zeroCopy. When zeroCopy is false or undefined(default),
+ * this passing GPUBuffer can be destroyed after tensor is created. When
+ * zeroCopy is true, this GPUBuffer is bound directly by the tensor, so donot
+ * destroy this GPUBuffer until all access is done.
  * @param shape The shape of the tensor. Optional. If not provided,
  *   it is inferred from `values`.
  * @param dtype The data type.
diff --git a/tfjs-core/src/types.ts b/tfjs-core/src/types.ts
index 5c38df4c38..2d3fe88dda 100644
--- a/tfjs-core/src/types.ts
+++ b/tfjs-core/src/types.ts
@@ -184,10 +184,15 @@ export interface WebGLData {
 }
 
 /**
- * Type for representing a buffer data to create a tensor. Use default usage
- * GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC. If not specified at
- * creating a tensor, tensor type is float32.
+ * Type for representing a buffer data to create a tensor. Buffer usage should
+ * at least support GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC. When
+ * zeroCopy is false or undefined (default), this GPUBuffer will be copied to
+ * the tensor's resource buffer. When zeroCopy is true, tensor will use this
+ * GPUBuffer as tensor's resource buffer, user should not destroy this GPUBuffer
+ * until all access is done. If not specified at creating a tensor, tensor type
+ * is float32.
  */
 export interface WebGPUData {
   buffer: GPUBuffer;
+  zeroCopy?: boolean;
 }

From fd85685a09dfb326071548e1edc3cb3df2cd6ad4 Mon Sep 17 00:00:00 2001
From: Xu Xing <xing.xu@intel.com>
Date: Wed, 23 Nov 2022 07:37:44 +0800
Subject: [PATCH 9/9] Fix donot

---
 tfjs-core/src/ops/tensor.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tfjs-core/src/ops/tensor.ts b/tfjs-core/src/ops/tensor.ts
index 72ff5bfc11..1a6be14cba 100644
--- a/tfjs-core/src/ops/tensor.ts
+++ b/tfjs-core/src/ops/tensor.ts
@@ -102,7 +102,7 @@ import {makeTensor} from './tensor_ops_util';
  * // downloading the values. Unlike WebGL, this optionally supports zero copy
  * // by WebGPUData.zeroCopy. When zeroCopy is false or undefined(default), this
  * // passing GPUBuffer can be destroyed after tensor is created. When zeroCopy
- * // is true, this GPUBuffer is bound directly by the tensor, so donot destroy
+ * // is true, this GPUBuffer is bound directly by the tensor, so do not destroy
  * // this GPUBuffer until all access is done.
  *
  * // Example for WebGPU:
@@ -181,7 +181,7 @@ import {makeTensor} from './tensor_ops_util';
  * be smaller than the byte size of tensor shape. WebGPUData optionally supports
  * zero copy by flag zeroCopy. When zeroCopy is false or undefined(default),
  * this passing GPUBuffer can be destroyed after tensor is created. When
- * zeroCopy is true, this GPUBuffer is bound directly by the tensor, so donot
+ * zeroCopy is true, this GPUBuffer is bound directly by the tensor, so do not
  * destroy this GPUBuffer until all access is done.
  * @param shape The shape of the tensor. Optional. If not provided,
  *   it is inferred from `values`.