From 521a3fdee9b0cf892e1c227cee554927c48c4740 Mon Sep 17 00:00:00 2001 From: Xu Xing Date: Wed, 9 Nov 2022 14:15:48 +0800 Subject: [PATCH 1/9] [webgpu] create tensor from GPUBuffer BUG: https://github.com/tensorflow/tfjs/issues/6232 --- tfjs-backend-webgl/src/backend_webgl.ts | 5 +- tfjs-backend-webgpu/src/backend_webgpu.ts | 38 +++++- .../src/backend_webgpu_test.ts | 118 ++++++++++++++++++ tfjs-core/src/backends/backend.ts | 10 +- tfjs-core/src/base.ts | 2 +- tfjs-core/src/ops/tensor.ts | 65 +++++++++- tfjs-core/src/ops/tensor_ops_util.ts | 22 ++-- tfjs-core/src/tensor_util_env.ts | 18 ++- tfjs-core/src/types.ts | 10 ++ tfjs-core/src/util_base.ts | 4 +- 10 files changed, 264 insertions(+), 28 deletions(-) diff --git a/tfjs-backend-webgl/src/backend_webgl.ts b/tfjs-backend-webgl/src/backend_webgl.ts index b01b295e0f..3048694276 100644 --- a/tfjs-backend-webgl/src/backend_webgl.ts +++ b/tfjs-backend-webgl/src/backend_webgl.ts @@ -1304,8 +1304,9 @@ export class MathBackendWebGL extends KernelBackend { * Create a TF.js tensor out of an existing WebGL texture. A new texture will * be created. */ - override createTensorFromTexture(values: WebGLData, shape: number[], - dtype: DataType): Tensor { + override createTensorFromGPUData( + values: WebGLData, shape: number[], dtype: DataType): Tensor { + values.channels = values.channels || 'RGBA'; const {texture, height, width, channels} = values; const backend = engine().backend as MathBackendWebGL; diff --git a/tfjs-backend-webgpu/src/backend_webgpu.ts b/tfjs-backend-webgpu/src/backend_webgpu.ts index 1b296a6b8d..44967371d7 100644 --- a/tfjs-backend-webgpu/src/backend_webgpu.ts +++ b/tfjs-backend-webgpu/src/backend_webgpu.ts @@ -17,7 +17,7 @@ import './flags_webgpu'; -import {backend_util, buffer, DataStorage, DataType, engine, env, GPUData, KernelBackend, Rank, RecursiveArray, ShapeMap, TensorBuffer, TensorInfo, TimingInfo, TypedArray, util} from '@tensorflow/tfjs-core'; +import {backend_util, buffer, DataStorage, DataType, engine, env, GPUData, KernelBackend, Rank, RecursiveArray, ShapeMap, Tensor, TensorBuffer, TensorInfo, TimingInfo, TypedArray, util, WebGPUData} from '@tensorflow/tfjs-core'; import {AdapterInfo} from './adapter_info'; import {BufferManager} from './buffer_manager'; @@ -51,6 +51,8 @@ type TensorData = { shape: number[], refCount: number, resourceInfo?: BufferInfo|TextureInfo, + // Indicate the tensor is created from an external GPU resource. + external?: boolean, // For complex numbers, the real and imaginary parts are stored as their own // individual tensors, with a parent joining the two with the // complexTensorInfos field. @@ -242,6 +244,11 @@ export class WebGPUBackend extends KernelBackend { if (!tensorData || !tensorData.resourceInfo) { return; } + // If tensor data is from external resource, do not release. + if (tensorData.external) { + tensorData.resourceInfo = null; + return; + } if ('texture' in tensorData.resourceInfo) { const textureInfo = tensorData.resourceInfo; if (textureInfo.texture instanceof GPUTexture) { @@ -282,7 +289,8 @@ export class WebGPUBackend extends KernelBackend { } } - override write(values: backend_util.BackendValues, shape: number[], + override write( + values: backend_util.BackendValues, shape: number[], dtype: DataType): DataId { if (dtype === 'complex64' && values != null) { throw new Error( @@ -437,6 +445,32 @@ export class WebGPUBackend extends KernelBackend { return vals; } + /** + * Create a TF.js tensor out of an existing WebGPU buffer. + */ + override createTensorFromGPUData( + values: WebGPUData, shape: number[], dtype: DataType): Tensor { + const buffer = values.buffer; + if (dtype === 'complex64') { + throw new Error(`Cannot write to a complex64 dtype. `); + } + const dataId = {id: this.nextDataId()}; + this.tensorMap.set( + dataId, {dtype, shape, values: null, refCount: 1, external: true}); + const tensorData = this.tensorMap.get(dataId); + const sizeFromShape = util.sizeFromShape(tensorData.shape); + const size = + webgpu_util.GPUBytesPerElement(tensorData.dtype) * sizeFromShape; + if (values.size < sizeFromShape) { + throw new Error(`GPUBuffer size(${ + values.size}) is smaller than tensor size(${sizeFromShape})!`); + } + + tensorData + .resourceInfo = {size, usage: this.defaultGpuBufferUsage(), buffer}; + return engine().makeTensorFromDataId(dataId, shape, dtype, this); + } + /** * Read tensor to a new GPUBuffer. * @param dataId The source tensor. diff --git a/tfjs-backend-webgpu/src/backend_webgpu_test.ts b/tfjs-backend-webgpu/src/backend_webgpu_test.ts index c34c9e69d7..04cee1ec29 100644 --- a/tfjs-backend-webgpu/src/backend_webgpu_test.ts +++ b/tfjs-backend-webgpu/src/backend_webgpu_test.ts @@ -366,3 +366,121 @@ describeWebGPU('keeping data on gpu ', () => { expect(endDataBuckets).toEqual(startDataBuckets + 1); }); }); + +async function createReadonlyGPUBufferFromData( + device: GPUDevice, data: number[], dtype: tf.DataType) { + const bytesPerElement = 4; + const sizeInBytes = data.length * bytesPerElement; + + const gpuWriteBuffer = device.createBuffer({ + mappedAtCreation: true, + size: sizeInBytes, + usage: GPUBufferUsage.MAP_WRITE | GPUBufferUsage.COPY_SRC + }); + const arrayBuffer = gpuWriteBuffer.getMappedRange(); + if (dtype === 'float32') { + new Float32Array(arrayBuffer).set(data); + } else if (dtype === 'int32') { + new Int32Array(arrayBuffer).set(data); + } else { + throw new Error( + `Creating tensor from GPUBuffer only supports` + + `'float32'|'int32' dtype, while the dtype is ${dtype}.`); + } + gpuWriteBuffer.unmap(); + + const gpuReadBuffer = device.createBuffer({ + mappedAtCreation: false, + size: sizeInBytes, + usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE + }); + + const copyEncoder = device.createCommandEncoder(); + copyEncoder.copyBufferToBuffer( + gpuWriteBuffer, 0, gpuReadBuffer, 0, sizeInBytes); + const copyCommands = copyEncoder.finish(); + device.queue.submit([copyCommands]); + gpuWriteBuffer.destroy(); + return gpuReadBuffer; +} + +async function testCreateTensorFromGPUBuffer( + dtype: tf.DataType, useDefaultShapeAndType = false) { + const webGPUBackend = tf.backend() as WebGPUBackend; + const device = webGPUBackend.device; + const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + const bData = [1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4]; + const expected = [2, 4, 6, 8, 6, 8, 10, 12, 10, 12, 14, 16, 14, 16, 18, 20]; + const aBuffer = await createReadonlyGPUBufferFromData(device, aData, dtype); + const shape: number[] = [aData.length]; + const startNumBytes = tf.memory().numBytes; + const startNumTensors = tf.memory().numTensors; + const a = useDefaultShapeAndType ? + tf.tensor({buffer: aBuffer, size: aData.length}) : + tf.tensor({buffer: aBuffer, size: aData.length}, shape, dtype); + const b = tf.tensor(bData, shape, dtype); + const result = tf.add(a, b); + tf.test_util.expectArraysClose(await result.data(), expected); + a.dispose(); + b.dispose(); + result.dispose(); + const endNumBytes = tf.memory().numBytes; + const endNumTensors = tf.memory().numTensors; + expect(endNumBytes - startNumBytes).toEqual(0); + expect(endNumTensors - startNumTensors).toEqual(0); + aBuffer.destroy(); +} + +describeWebGPU('create tensor from GPUBuffer', () => { + it('use default shape and data type(float32)', async () => { + await testCreateTensorFromGPUBuffer('float32', true); + }); + + it('work for float32', async () => { + await testCreateTensorFromGPUBuffer('float32'); + }); + + it('work for int32', async () => { + await testCreateTensorFromGPUBuffer('int32'); + }); + + it('throw when size is not set or incorrect', async () => { + const webGPUBackend = tf.backend() as WebGPUBackend; + const device = webGPUBackend.device; + const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + const dtype = 'float32'; + const aBuffer = await createReadonlyGPUBufferFromData(device, aData, dtype); + const shape: number[] = [aData.length]; + const a = () => tf.tensor({buffer: aBuffer}, shape, dtype); + expect(a).toThrowError(); + const b = () => tf.tensor({buffer: aBuffer, size: 0}, shape, dtype); + expect(b).toThrowError(); + aBuffer.destroy(); + }); + + it('two tensors share the same GPUBuffer', async () => { + const webGPUBackend = tf.backend() as WebGPUBackend; + const device = webGPUBackend.device; + const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + const dtype = 'float32'; + const aBuffer = await createReadonlyGPUBufferFromData(device, aData, dtype); + const startNumBytes = tf.memory().numBytes; + const startNumTensors = tf.memory().numTensors; + const shape: number[] = [aData.length]; + const size = aData.length * 4; + const a = tf.tensor({buffer: aBuffer, size}, shape, dtype); + const b = tf.tensor({buffer: aBuffer, size}, shape, dtype); + const result = tf.add(a, b); + const expected = + [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]; + tf.test_util.expectArraysClose(await result.data(), expected); + a.dispose(); + b.dispose(); + result.dispose(); + const endNumBytes = tf.memory().numBytes; + const endNumTensors = tf.memory().numTensors; + expect(endNumBytes - startNumBytes).toEqual(0); + expect(endNumTensors - startNumTensors).toEqual(0); + aBuffer.destroy(); + }); +}); diff --git a/tfjs-core/src/backends/backend.ts b/tfjs-core/src/backends/backend.ts index 33626bd31d..6f23ec0a3e 100644 --- a/tfjs-core/src/backends/backend.ts +++ b/tfjs-core/src/backends/backend.ts @@ -17,7 +17,7 @@ import {Backend, DataToGPUOptions, GPUData, Tensor} from '../tensor'; import {DataId} from '../tensor_info'; -import {BackendValues, DataType, WebGLData} from '../types'; +import {BackendValues, DataType, WebGLData, WebGPUData} from '../types'; export const EPSILON_FLOAT32 = 1e-7; export const EPSILON_FLOAT16 = 1e-4; @@ -133,10 +133,12 @@ export class KernelBackend implements TensorStorage, Backend, BackendTimer { refCount: number): void { return notYetImplemented('move'); } - createTensorFromTexture(values: WebGLData, shape: number[], dtype: DataType): - Tensor { - return notYetImplemented('createTensorFromTexture'); + + createTensorFromGPUData( + values: WebGLData|WebGPUData, shape: number[], dtype: DataType): Tensor { + return notYetImplemented('createTensorFromGPUData'); } + memory(): {unreliable: boolean; reasons?: string[]} { return notYetImplemented('memory'); } diff --git a/tfjs-core/src/base.ts b/tfjs-core/src/base.ts index c8a5ef7419..e869ede5d2 100644 --- a/tfjs-core/src/base.ts +++ b/tfjs-core/src/base.ts @@ -55,7 +55,7 @@ export {RMSPropOptimizer} from './optimizers/rmsprop_optimizer'; export {SGDOptimizer} from './optimizers/sgd_optimizer'; export {DataToGPUOptions, DataToGPUWebGLOption, GPUData, Scalar, Tensor, Tensor1D, Tensor2D, Tensor3D, Tensor4D, Tensor5D, TensorBuffer, Variable} from './tensor'; export {GradSaveFunc, NamedTensorMap, TensorContainer, TensorContainerArray, TensorContainerObject} from './tensor_types'; -export {BackendValues, DataType, DataTypeMap, DataValues, NumericDataType, PixelData, Rank, RecursiveArray, ScalarLike, ShapeMap, sumOutType, TensorLike, TypedArray, upcastType, WebGLData} from './types'; +export {BackendValues, DataType, DataTypeMap, DataValues, NumericDataType, PixelData, Rank, RecursiveArray, ScalarLike, ShapeMap, sumOutType, TensorLike, TypedArray, upcastType, WebGLData, WebGPUData} from './types'; export * from './ops/ops'; export {Reduction} from './ops/loss_ops_utils'; diff --git a/tfjs-core/src/ops/tensor.ts b/tfjs-core/src/ops/tensor.ts index cf9933251c..507454ee41 100644 --- a/tfjs-core/src/ops/tensor.ts +++ b/tfjs-core/src/ops/tensor.ts @@ -18,7 +18,7 @@ import {Tensor} from '../tensor'; import {inferShape} from '../tensor_util_env'; import {TensorLike} from '../types'; -import {DataType, Rank, ShapeMap, WebGLData} from '../types'; +import {DataType, Rank, ShapeMap, WebGLData, WebGPUData} from '../types'; import {makeTensor} from './tensor_ops_util'; @@ -92,6 +92,67 @@ import {makeTensor} from './tensor_ops_util'; * * const tex = a.dataToGPU(); * ``` + * + * ```js + * // Pass a `WebGPUData` object and specify a shape yourself. + * + * // This makes it possible for TF.js applications to avoid GPU / CPU sync. + * // For example, if your application includes a preprocessing step on the GPU, + * // you could upload the GPU output directly to TF.js, rather than first + * // downloading the values. + * + * // Example for WebGPU: + * async function createReadonlyGPUBufferFromData(device, data, dtype) { + * const bytesPerElement = 4; + * const sizeInBytes = data.length * bytesPerElement; + * + * const gpuWriteBuffer = device.createBuffer({ + * mappedAtCreation: true, + * size: sizeInBytes, + * usage: GPUBufferUsage.MAP_WRITE | GPUBufferUsage.COPY_SRC + * }); + * const arrayBuffer = gpuWriteBuffer.getMappedRange(); + * if (dtype === 'float32') { + * new Float32Array(arrayBuffer).set(data); + * } else if (dtype === 'int32') { + * new Int32Array(arrayBuffer).set(data); + * } else { + * throw new Error( + * `Creating tensor from GPUBuffer only supports` + + * `'float32'|'int32' dtype, while the dtype is ${dtype}.`); + * } + * gpuWriteBuffer.unmap(); + * + * const gpuReadBuffer = device.createBuffer({ + * mappedAtCreation: false, + * size: sizeInBytes, + * usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE + * }); + * + * const copyEncoder = device.createCommandEncoder(); + * copyEncoder.copyBufferToBuffer( + * gpuWriteBuffer, 0, gpuReadBuffer, 0, sizeInBytes); + * const copyCommands = copyEncoder.finish(); + * device.queue.submit([copyCommands]); + * gpuWriteBuffer.destroy(); + * return gpuReadBuffer; + * } + * + * const dtype = 'float32'; + * const device = tf.backend().device; + * const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + * const bData = [1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4]; + * const expected = [2, 4, 6, 8, 6, 8, 10, 12, 10, 12, 14, 16, 14, 16, 18, 20]; + * const aBuffer = await createReadonlyGPUBufferFromData(device, aData, dtype); + * const shape = [aData.length]; + * const a = tf.tensor({buffer: aBuffer, size: aData.length}); + * const b = tf.tensor(bData, shape, dtype); + * const result = tf.add(a, b); + * a.dispose(); + * b.dispose(); + * result.dispose(); + * aBuffer.destroy(); + * ``` * @param values The values of the tensor. Can be nested array of numbers, * or a flat array, or a `TypedArray`, or a `WebGLData` object. If the * values are strings, they will be encoded as utf-8 and kept as `Uint8Array[]`. @@ -113,7 +174,7 @@ import {makeTensor} from './tensor_ops_util'; * @doc {heading: 'Tensors', subheading: 'Creation'} */ export function tensor( - values: TensorLike|WebGLData, shape?: ShapeMap[R], + values: TensorLike|WebGLData|WebGPUData, shape?: ShapeMap[R], dtype?: DataType): Tensor { const inferredShape = inferShape(values, dtype); return makeTensor(values, shape, inferredShape, dtype) as Tensor; diff --git a/tfjs-core/src/ops/tensor_ops_util.ts b/tfjs-core/src/ops/tensor_ops_util.ts index 1b497d4ceb..d72a11317e 100644 --- a/tfjs-core/src/ops/tensor_ops_util.ts +++ b/tfjs-core/src/ops/tensor_ops_util.ts @@ -17,32 +17,34 @@ import {ENGINE} from '../engine'; import {Tensor} from '../tensor'; -import {TensorLike, TypedArray, WebGLData} from '../types'; +import {TensorLike, TypedArray, WebGLData, WebGPUData} from '../types'; import {DataType} from '../types'; import {assert, assertNonNegativeIntegerDimensions, flatten, inferDtype, isTypedArray, sizeFromShape, toTypedArray} from '../util'; /** This is shared code across all tensor creation methods. */ export function makeTensor( - values: TensorLike|WebGLData, shape: number[], inferredShape: number[], - dtype?: DataType): Tensor { + values: TensorLike|WebGLData|WebGPUData, shape: number[], + inferredShape: number[], dtype?: DataType): Tensor { if (dtype == null) { dtype = inferDtype(values); - } - if (dtype === 'complex64') { + } else if (dtype === 'complex64') { throw new Error( `Cannot construct a complex64 tensor directly. ` + `Please use tf.complex(real, imag).`); } - if (typeof values === 'object' && 'texture' in values) { + + if (typeof values === 'object' && + (('texture' in values && values.texture instanceof WebGLTexture) || + ('buffer' in values && values.buffer instanceof GPUBuffer))) { if (dtype !== 'float32' && dtype !== 'int32') { throw new Error( - `Creating tensor from texture only supports ` + + `Creating tensor from GPU data only supports ` + `'float32'|'int32' dtype, while the dtype is ${dtype}.`); } - values.channels = values.channels || 'RGBA'; - return ENGINE.backend.createTensorFromTexture( - values, shape || inferredShape, dtype); + return ENGINE.backend.createTensorFromGPUData( + values as WebGLData | WebGPUData, shape || inferredShape, dtype); } + if (!isTypedArray(values) && !Array.isArray(values) && typeof values !== 'number' && typeof values !== 'boolean' && typeof values !== 'string') { diff --git a/tfjs-core/src/tensor_util_env.ts b/tfjs-core/src/tensor_util_env.ts index 139257d491..c7705697f6 100644 --- a/tfjs-core/src/tensor_util_env.ts +++ b/tfjs-core/src/tensor_util_env.ts @@ -18,19 +18,27 @@ import {ENGINE} from './engine'; import {env} from './environment'; import {Tensor} from './tensor'; -import {DataType, TensorLike, WebGLData} from './types'; +import {DataType, TensorLike, WebGLData, WebGPUData} from './types'; import {assert, flatten, inferDtype, isTypedArray, toTypedArray} from './util'; export function inferShape( - val: TensorLike|WebGLData, dtype?: DataType): number[] { + val: TensorLike|WebGLData|WebGPUData, dtype?: DataType): number[] { let firstElem: typeof val = val; if (isTypedArray(val)) { return dtype === 'string' ? [] : [val.length]; } - if (typeof val === 'object' && 'texture' in val) { - const usedChannels = val.channels || 'RGBA'; - return [val.height, val.width * usedChannels.length]; + const isObject = typeof val === 'object'; + if (isObject) { + if ('texture' in val && val.texture instanceof WebGLTexture) { + const usedChannels = val.channels || 'RGBA'; + return [val.height, val.width * usedChannels.length]; + } else if ('buffer' in val && val.buffer instanceof GPUBuffer) { + if (val.size == null) { + throw new Error('size should be defined in WebGPUData!'); + } + return [val.size]; + } } if (!Array.isArray(val)) { return []; // Scalar. diff --git a/tfjs-core/src/types.ts b/tfjs-core/src/types.ts index 7e416e3c81..5bcb4231f6 100644 --- a/tfjs-core/src/types.ts +++ b/tfjs-core/src/types.ts @@ -182,3 +182,13 @@ export interface WebGLData { width: number; channels: WebGLChannels; } + +/** + * Type for representing a buffer data to create a tensor. Use default usage + * GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC. If not specified at + * creating a tensor, tensor type is float32. + */ +export interface WebGPUData { + buffer: GPUBuffer; + size: number; +} diff --git a/tfjs-core/src/util_base.ts b/tfjs-core/src/util_base.ts index f4a6f32d22..132cc713d3 100644 --- a/tfjs-core/src/util_base.ts +++ b/tfjs-core/src/util_base.ts @@ -15,7 +15,7 @@ * ============================================================================= */ -import {DataType, DataTypeMap, FlatVector, NumericDataType, RecursiveArray, TensorLike, TypedArray, WebGLData} from './types'; +import {DataType, DataTypeMap, FlatVector, NumericDataType, RecursiveArray, TensorLike, TypedArray, WebGLData, WebGPUData} from './types'; /** * Shuffles the array in-place using Fisher-Yates algorithm. @@ -559,7 +559,7 @@ export function isNumber(value: {}): boolean { return typeof value === 'number'; } -export function inferDtype(values: TensorLike|WebGLData): DataType { +export function inferDtype(values: TensorLike|WebGLData|WebGPUData): DataType { if (Array.isArray(values)) { return inferDtype(values[0]); } From 2245c963db55444a5bfec26725b402cd4175dcac Mon Sep 17 00:00:00 2001 From: Xu Xing Date: Fri, 11 Nov 2022 13:17:53 +0800 Subject: [PATCH 2/9] Fix CPU fail --- tfjs-core/src/ops/tensor_ops_util.ts | 4 ++-- tfjs-core/src/tensor_util_env.ts | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tfjs-core/src/ops/tensor_ops_util.ts b/tfjs-core/src/ops/tensor_ops_util.ts index d72a11317e..197ccf1e30 100644 --- a/tfjs-core/src/ops/tensor_ops_util.ts +++ b/tfjs-core/src/ops/tensor_ops_util.ts @@ -34,8 +34,8 @@ export function makeTensor( } if (typeof values === 'object' && - (('texture' in values && values.texture instanceof WebGLTexture) || - ('buffer' in values && values.buffer instanceof GPUBuffer))) { + ('texture' in values || + ('buffer' in values && !(values.buffer instanceof ArrayBuffer)))) { if (dtype !== 'float32' && dtype !== 'int32') { throw new Error( `Creating tensor from GPU data only supports ` + diff --git a/tfjs-core/src/tensor_util_env.ts b/tfjs-core/src/tensor_util_env.ts index c7705697f6..688c2bc73f 100644 --- a/tfjs-core/src/tensor_util_env.ts +++ b/tfjs-core/src/tensor_util_env.ts @@ -30,10 +30,10 @@ export function inferShape( } const isObject = typeof val === 'object'; if (isObject) { - if ('texture' in val && val.texture instanceof WebGLTexture) { + if ('texture' in val) { const usedChannels = val.channels || 'RGBA'; return [val.height, val.width * usedChannels.length]; - } else if ('buffer' in val && val.buffer instanceof GPUBuffer) { + } else if ('buffer' in val && !(val.buffer instanceof ArrayBuffer)) { if (val.size == null) { throw new Error('size should be defined in WebGPUData!'); } From d6d7101c9bc2c38f29d9236a66baf600bd79e10e Mon Sep 17 00:00:00 2001 From: Xu Xing Date: Fri, 11 Nov 2022 13:58:55 +0800 Subject: [PATCH 3/9] Remove size --- tfjs-backend-webgpu/src/backend_webgpu.ts | 4 ++-- .../src/backend_webgpu_test.ts | 24 ++++--------------- tfjs-core/src/ops/tensor.ts | 2 +- tfjs-core/src/tensor_util_env.ts | 6 ++--- tfjs-core/src/types.ts | 1 - 5 files changed, 9 insertions(+), 28 deletions(-) diff --git a/tfjs-backend-webgpu/src/backend_webgpu.ts b/tfjs-backend-webgpu/src/backend_webgpu.ts index 44967371d7..81a5eef67a 100644 --- a/tfjs-backend-webgpu/src/backend_webgpu.ts +++ b/tfjs-backend-webgpu/src/backend_webgpu.ts @@ -461,9 +461,9 @@ export class WebGPUBackend extends KernelBackend { const sizeFromShape = util.sizeFromShape(tensorData.shape); const size = webgpu_util.GPUBytesPerElement(tensorData.dtype) * sizeFromShape; - if (values.size < sizeFromShape) { + if (values.buffer.size < sizeFromShape) { throw new Error(`GPUBuffer size(${ - values.size}) is smaller than tensor size(${sizeFromShape})!`); + values.buffer.size}) is smaller than tensor size(${sizeFromShape})!`); } tensorData diff --git a/tfjs-backend-webgpu/src/backend_webgpu_test.ts b/tfjs-backend-webgpu/src/backend_webgpu_test.ts index 04cee1ec29..5f2ef22c62 100644 --- a/tfjs-backend-webgpu/src/backend_webgpu_test.ts +++ b/tfjs-backend-webgpu/src/backend_webgpu_test.ts @@ -415,9 +415,8 @@ async function testCreateTensorFromGPUBuffer( const shape: number[] = [aData.length]; const startNumBytes = tf.memory().numBytes; const startNumTensors = tf.memory().numTensors; - const a = useDefaultShapeAndType ? - tf.tensor({buffer: aBuffer, size: aData.length}) : - tf.tensor({buffer: aBuffer, size: aData.length}, shape, dtype); + const a = useDefaultShapeAndType ? tf.tensor({buffer: aBuffer}) : + tf.tensor({buffer: aBuffer}, shape, dtype); const b = tf.tensor(bData, shape, dtype); const result = tf.add(a, b); tf.test_util.expectArraysClose(await result.data(), expected); @@ -444,20 +443,6 @@ describeWebGPU('create tensor from GPUBuffer', () => { await testCreateTensorFromGPUBuffer('int32'); }); - it('throw when size is not set or incorrect', async () => { - const webGPUBackend = tf.backend() as WebGPUBackend; - const device = webGPUBackend.device; - const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; - const dtype = 'float32'; - const aBuffer = await createReadonlyGPUBufferFromData(device, aData, dtype); - const shape: number[] = [aData.length]; - const a = () => tf.tensor({buffer: aBuffer}, shape, dtype); - expect(a).toThrowError(); - const b = () => tf.tensor({buffer: aBuffer, size: 0}, shape, dtype); - expect(b).toThrowError(); - aBuffer.destroy(); - }); - it('two tensors share the same GPUBuffer', async () => { const webGPUBackend = tf.backend() as WebGPUBackend; const device = webGPUBackend.device; @@ -467,9 +452,8 @@ describeWebGPU('create tensor from GPUBuffer', () => { const startNumBytes = tf.memory().numBytes; const startNumTensors = tf.memory().numTensors; const shape: number[] = [aData.length]; - const size = aData.length * 4; - const a = tf.tensor({buffer: aBuffer, size}, shape, dtype); - const b = tf.tensor({buffer: aBuffer, size}, shape, dtype); + const a = tf.tensor({buffer: aBuffer}, shape, dtype); + const b = tf.tensor({buffer: aBuffer}, shape, dtype); const result = tf.add(a, b); const expected = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]; diff --git a/tfjs-core/src/ops/tensor.ts b/tfjs-core/src/ops/tensor.ts index 507454ee41..37f5a05fb9 100644 --- a/tfjs-core/src/ops/tensor.ts +++ b/tfjs-core/src/ops/tensor.ts @@ -145,7 +145,7 @@ import {makeTensor} from './tensor_ops_util'; * const expected = [2, 4, 6, 8, 6, 8, 10, 12, 10, 12, 14, 16, 14, 16, 18, 20]; * const aBuffer = await createReadonlyGPUBufferFromData(device, aData, dtype); * const shape = [aData.length]; - * const a = tf.tensor({buffer: aBuffer, size: aData.length}); + * const a = tf.tensor({buffer: aBuffer}, shape, dtype); * const b = tf.tensor(bData, shape, dtype); * const result = tf.add(a, b); * a.dispose(); diff --git a/tfjs-core/src/tensor_util_env.ts b/tfjs-core/src/tensor_util_env.ts index 688c2bc73f..e7be429742 100644 --- a/tfjs-core/src/tensor_util_env.ts +++ b/tfjs-core/src/tensor_util_env.ts @@ -20,6 +20,7 @@ import {env} from './environment'; import {Tensor} from './tensor'; import {DataType, TensorLike, WebGLData, WebGPUData} from './types'; import {assert, flatten, inferDtype, isTypedArray, toTypedArray} from './util'; +import {bytesPerElement} from './util_base'; export function inferShape( val: TensorLike|WebGLData|WebGPUData, dtype?: DataType): number[] { @@ -34,10 +35,7 @@ export function inferShape( const usedChannels = val.channels || 'RGBA'; return [val.height, val.width * usedChannels.length]; } else if ('buffer' in val && !(val.buffer instanceof ArrayBuffer)) { - if (val.size == null) { - throw new Error('size should be defined in WebGPUData!'); - } - return [val.size]; + return [val.buffer.size / (dtype == null ? 4 : bytesPerElement(dtype))]; } } if (!Array.isArray(val)) { diff --git a/tfjs-core/src/types.ts b/tfjs-core/src/types.ts index 5bcb4231f6..5c38df4c38 100644 --- a/tfjs-core/src/types.ts +++ b/tfjs-core/src/types.ts @@ -190,5 +190,4 @@ export interface WebGLData { */ export interface WebGPUData { buffer: GPUBuffer; - size: number; } From 2f92be58be8f5565307767ed8a0937fd40a56e74 Mon Sep 17 00:00:00 2001 From: Xu Xing Date: Tue, 15 Nov 2022 15:01:02 +0800 Subject: [PATCH 4/9] Add test case --- tfjs-backend-webgpu/src/backend_webgpu.ts | 12 +++--- .../src/backend_webgpu_test.ts | 39 +++++++++++++++++++ tfjs-core/src/ops/tensor.ts | 4 +- 3 files changed, 47 insertions(+), 8 deletions(-) diff --git a/tfjs-backend-webgpu/src/backend_webgpu.ts b/tfjs-backend-webgpu/src/backend_webgpu.ts index 81a5eef67a..a79ae32cd1 100644 --- a/tfjs-backend-webgpu/src/backend_webgpu.ts +++ b/tfjs-backend-webgpu/src/backend_webgpu.ts @@ -458,16 +458,14 @@ export class WebGPUBackend extends KernelBackend { this.tensorMap.set( dataId, {dtype, shape, values: null, refCount: 1, external: true}); const tensorData = this.tensorMap.get(dataId); - const sizeFromShape = util.sizeFromShape(tensorData.shape); - const size = - webgpu_util.GPUBytesPerElement(tensorData.dtype) * sizeFromShape; - if (values.buffer.size < sizeFromShape) { + const size = webgpu_util.GPUBytesPerElement(tensorData.dtype) * + util.sizeFromShape(tensorData.shape); + if (values.buffer.size < size) { throw new Error(`GPUBuffer size(${ - values.buffer.size}) is smaller than tensor size(${sizeFromShape})!`); + values.buffer.size}) is smaller than tensor size(${size})!`); } - tensorData - .resourceInfo = {size, usage: this.defaultGpuBufferUsage(), buffer}; + tensorData.resourceInfo = {size: buffer.size, usage: buffer.usage, buffer}; return engine().makeTensorFromDataId(dataId, shape, dtype, this); } diff --git a/tfjs-backend-webgpu/src/backend_webgpu_test.ts b/tfjs-backend-webgpu/src/backend_webgpu_test.ts index 5f2ef22c62..ec6c5298bf 100644 --- a/tfjs-backend-webgpu/src/backend_webgpu_test.ts +++ b/tfjs-backend-webgpu/src/backend_webgpu_test.ts @@ -467,4 +467,43 @@ describeWebGPU('create tensor from GPUBuffer', () => { expect(endNumTensors - startNumTensors).toEqual(0); aBuffer.destroy(); }); + + it('GPUBuffer size is bigger than tensor size', async () => { + const webGPUBackend = tf.backend() as WebGPUBackend; + const device = webGPUBackend.device; + const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + const dtype = 'float32'; + const aBuffer = await createReadonlyGPUBufferFromData(device, aData, dtype); + const startNumBytes = tf.memory().numBytes; + const startNumTensors = tf.memory().numTensors; + // GPUBuffer.size is bigger than shape size + const shape: number[] = [aData.length - 1]; + const a = tf.tensor({buffer: aBuffer}, shape, dtype); + const b = tf.tensor({buffer: aBuffer}, shape, dtype); + const result = tf.add(a, b); + const expected = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]; + console.log(await result.data()); + tf.test_util.expectArraysClose(await result.data(), expected); + a.dispose(); + b.dispose(); + result.dispose(); + const endNumBytes = tf.memory().numBytes; + const endNumTensors = tf.memory().numTensors; + expect(endNumBytes - startNumBytes).toEqual(0); + expect(endNumTensors - startNumTensors).toEqual(0); + aBuffer.destroy(); + }); + + it('throw for GPUBuffer size is smaller than tensor size', async () => { + const webGPUBackend = tf.backend() as WebGPUBackend; + const device = webGPUBackend.device; + const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + const dtype = 'float32'; + const aBuffer = await createReadonlyGPUBufferFromData(device, aData, dtype); + // Throw when GPUBuffer.size is smaller than shape size + const shape: number[] = [aData.length + 1]; + const a = () => tf.tensor({buffer: aBuffer}, shape, dtype); + expect(a).toThrowError(); + aBuffer.destroy(); + }); }); diff --git a/tfjs-core/src/ops/tensor.ts b/tfjs-core/src/ops/tensor.ts index 37f5a05fb9..82fcf2e01c 100644 --- a/tfjs-core/src/ops/tensor.ts +++ b/tfjs-core/src/ops/tensor.ts @@ -99,7 +99,9 @@ import {makeTensor} from './tensor_ops_util'; * // This makes it possible for TF.js applications to avoid GPU / CPU sync. * // For example, if your application includes a preprocessing step on the GPU, * // you could upload the GPU output directly to TF.js, rather than first - * // downloading the values. + * // downloading the values. Unlike WebGL, to support zero copy, this GPUBuffer + * // is bound directly by the tensor. So donot destroy this GPUBuffer until all + * // access are done. * * // Example for WebGPU: * async function createReadonlyGPUBufferFromData(device, data, dtype) { From fcd7a22c8f6cf6b25d075def7f60c15d4f6ec9e3 Mon Sep 17 00:00:00 2001 From: Xu Xing Date: Tue, 15 Nov 2022 15:49:25 +0800 Subject: [PATCH 5/9] Remove async and fix comments --- .../src/backend_webgpu_test.ts | 11 +++---- tfjs-core/src/ops/tensor.ts | 33 ++++++++++--------- 2 files changed, 23 insertions(+), 21 deletions(-) diff --git a/tfjs-backend-webgpu/src/backend_webgpu_test.ts b/tfjs-backend-webgpu/src/backend_webgpu_test.ts index ec6c5298bf..6367528a6f 100644 --- a/tfjs-backend-webgpu/src/backend_webgpu_test.ts +++ b/tfjs-backend-webgpu/src/backend_webgpu_test.ts @@ -367,7 +367,7 @@ describeWebGPU('keeping data on gpu ', () => { }); }); -async function createReadonlyGPUBufferFromData( +function createReadonlyGPUBufferFromData( device: GPUDevice, data: number[], dtype: tf.DataType) { const bytesPerElement = 4; const sizeInBytes = data.length * bytesPerElement; @@ -411,7 +411,7 @@ async function testCreateTensorFromGPUBuffer( const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; const bData = [1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4]; const expected = [2, 4, 6, 8, 6, 8, 10, 12, 10, 12, 14, 16, 14, 16, 18, 20]; - const aBuffer = await createReadonlyGPUBufferFromData(device, aData, dtype); + const aBuffer = createReadonlyGPUBufferFromData(device, aData, dtype); const shape: number[] = [aData.length]; const startNumBytes = tf.memory().numBytes; const startNumTensors = tf.memory().numTensors; @@ -448,7 +448,7 @@ describeWebGPU('create tensor from GPUBuffer', () => { const device = webGPUBackend.device; const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; const dtype = 'float32'; - const aBuffer = await createReadonlyGPUBufferFromData(device, aData, dtype); + const aBuffer = createReadonlyGPUBufferFromData(device, aData, dtype); const startNumBytes = tf.memory().numBytes; const startNumTensors = tf.memory().numTensors; const shape: number[] = [aData.length]; @@ -473,7 +473,7 @@ describeWebGPU('create tensor from GPUBuffer', () => { const device = webGPUBackend.device; const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; const dtype = 'float32'; - const aBuffer = await createReadonlyGPUBufferFromData(device, aData, dtype); + const aBuffer = createReadonlyGPUBufferFromData(device, aData, dtype); const startNumBytes = tf.memory().numBytes; const startNumTensors = tf.memory().numTensors; // GPUBuffer.size is bigger than shape size @@ -482,7 +482,6 @@ describeWebGPU('create tensor from GPUBuffer', () => { const b = tf.tensor({buffer: aBuffer}, shape, dtype); const result = tf.add(a, b); const expected = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]; - console.log(await result.data()); tf.test_util.expectArraysClose(await result.data(), expected); a.dispose(); b.dispose(); @@ -499,7 +498,7 @@ describeWebGPU('create tensor from GPUBuffer', () => { const device = webGPUBackend.device; const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; const dtype = 'float32'; - const aBuffer = await createReadonlyGPUBufferFromData(device, aData, dtype); + const aBuffer = createReadonlyGPUBufferFromData(device, aData, dtype); // Throw when GPUBuffer.size is smaller than shape size const shape: number[] = [aData.length + 1]; const a = () => tf.tensor({buffer: aBuffer}, shape, dtype); diff --git a/tfjs-core/src/ops/tensor.ts b/tfjs-core/src/ops/tensor.ts index 82fcf2e01c..80e7569265 100644 --- a/tfjs-core/src/ops/tensor.ts +++ b/tfjs-core/src/ops/tensor.ts @@ -104,7 +104,7 @@ import {makeTensor} from './tensor_ops_util'; * // access are done. * * // Example for WebGPU: - * async function createReadonlyGPUBufferFromData(device, data, dtype) { + * function createReadonlyGPUBufferFromData(device, data, dtype) { * const bytesPerElement = 4; * const sizeInBytes = data.length * bytesPerElement; * @@ -145,7 +145,7 @@ import {makeTensor} from './tensor_ops_util'; * const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; * const bData = [1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4]; * const expected = [2, 4, 6, 8, 6, 8, 10, 12, 10, 12, 14, 16, 14, 16, 18, 20]; - * const aBuffer = await createReadonlyGPUBufferFromData(device, aData, dtype); + * const aBuffer = createReadonlyGPUBufferFromData(device, aData, dtype); * const shape = [aData.length]; * const a = tf.tensor({buffer: aBuffer}, shape, dtype); * const b = tf.tensor(bData, shape, dtype); @@ -156,19 +156,22 @@ import {makeTensor} from './tensor_ops_util'; * aBuffer.destroy(); * ``` * @param values The values of the tensor. Can be nested array of numbers, - * or a flat array, or a `TypedArray`, or a `WebGLData` object. If the - * values are strings, they will be encoded as utf-8 and kept as `Uint8Array[]`. - * If the values is a `WebGLData` object, the dtype could only be 'float32' or - * 'int32' and the object has to have: 1. texture, a `WebGLTexture`, the texture - * must share the same `WebGLRenderingContext` with TFJS's WebGL backend (you - * could create a custom WebGL backend from your texture's canvas) and the - * internal texture format for the input texture must be floating point or - * normalized integer; 2. height, the height of the texture; 3. width, the width - * of the texture; 4. channels, a non-empty subset of 'RGBA', indicating the - * values of which channels will be passed to the tensor, such as 'R' or 'BR' - * (The order of the channels affect the order of tensor values. ). (If the - * values passed from texture is less than the tensor size, zeros will be padded - * at the rear.) + * or a flat array, or a `TypedArray`, or a `WebGLData` object, or a + * `WebGPUData` object. If the values are strings, they will be encoded as utf-8 + * and kept as `Uint8Array[]`. If the values is a `WebGLData` object, the dtype + * could only be 'float32' or 'int32' and the object has to have: 1. texture, a + * `WebGLTexture`, the texture must share the same `WebGLRenderingContext` with + * TFJS's WebGL backend (you could create a custom WebGL backend from your + * texture's canvas) and the internal texture format for the input texture must + * be floating point or normalized integer; 2. height, the height of the + * texture; 3. width, the width of the texture; 4. channels, a non-empty subset + * of 'RGBA', indicating the values of which channels will be passed to the + * tensor, such as 'R' or 'BR' (The order of the channels affect the order of + * tensor values. ). (If the values passed from texture is less than the tensor + * size, zeros will be padded at the rear.). If the values is a `WebGPUData` + * object, the dtype could only be 'float32' or 'int32 and the object has to + * have: buffer, a `GPUBuffer`, the buffer must share the same `GPUDevice` with + * TFJS's WebGPU backend. * @param shape The shape of the tensor. Optional. If not provided, * it is inferred from `values`. * @param dtype The data type. From 76ccb5a4d1fc94e1de317322351e6fc4bb48b097 Mon Sep 17 00:00:00 2001 From: Xu Xing Date: Tue, 15 Nov 2022 16:47:46 +0800 Subject: [PATCH 6/9] Add read case and usage check --- tfjs-backend-webgpu/src/backend_webgpu.ts | 4 ++ .../src/backend_webgpu_test.ts | 65 +++++++++++++++++-- tfjs-core/src/ops/tensor.ts | 7 +- 3 files changed, 67 insertions(+), 9 deletions(-) diff --git a/tfjs-backend-webgpu/src/backend_webgpu.ts b/tfjs-backend-webgpu/src/backend_webgpu.ts index a79ae32cd1..7aa8ddfd3e 100644 --- a/tfjs-backend-webgpu/src/backend_webgpu.ts +++ b/tfjs-backend-webgpu/src/backend_webgpu.ts @@ -463,6 +463,10 @@ export class WebGPUBackend extends KernelBackend { if (values.buffer.size < size) { throw new Error(`GPUBuffer size(${ values.buffer.size}) is smaller than tensor size(${size})!`); + } else if ( + (values.buffer.usage & GPUBufferUsage.STORAGE) !== + GPUBufferUsage.STORAGE) { + throw new Error('GPUBuffer.usage should include GPUBufferUsage.STORAGE!'); } tensorData.resourceInfo = {size: buffer.size, usage: buffer.usage, buffer}; diff --git a/tfjs-backend-webgpu/src/backend_webgpu_test.ts b/tfjs-backend-webgpu/src/backend_webgpu_test.ts index 6367528a6f..0d1af6f3e5 100644 --- a/tfjs-backend-webgpu/src/backend_webgpu_test.ts +++ b/tfjs-backend-webgpu/src/backend_webgpu_test.ts @@ -368,7 +368,8 @@ describeWebGPU('keeping data on gpu ', () => { }); function createReadonlyGPUBufferFromData( - device: GPUDevice, data: number[], dtype: tf.DataType) { + device: GPUDevice, data: number[], dtype: tf.DataType, + bufferUsage = GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE) { const bytesPerElement = 4; const sizeInBytes = data.length * bytesPerElement; @@ -389,11 +390,8 @@ function createReadonlyGPUBufferFromData( } gpuWriteBuffer.unmap(); - const gpuReadBuffer = device.createBuffer({ - mappedAtCreation: false, - size: sizeInBytes, - usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE - }); + const gpuReadBuffer = device.createBuffer( + {mappedAtCreation: false, size: sizeInBytes, usage: bufferUsage}); const copyEncoder = device.createCommandEncoder(); copyEncoder.copyBufferToBuffer( @@ -404,6 +402,30 @@ function createReadonlyGPUBufferFromData( return gpuReadBuffer; } +function createStagingGPUBufferFromData( + device: GPUDevice, data: number[], dtype: tf.DataType) { + const bytesPerElement = 4; + const sizeInBytes = data.length * bytesPerElement; + + const gpuWriteBuffer = device.createBuffer({ + mappedAtCreation: true, + size: sizeInBytes, + usage: GPUBufferUsage.MAP_WRITE | GPUBufferUsage.COPY_SRC + }); + const arrayBuffer = gpuWriteBuffer.getMappedRange(); + if (dtype === 'float32') { + new Float32Array(arrayBuffer).set(data); + } else if (dtype === 'int32') { + new Int32Array(arrayBuffer).set(data); + } else { + throw new Error( + `Creating tensor from GPUBuffer only supports` + + `'float32'|'int32' dtype, while the dtype is ${dtype}.`); + } + gpuWriteBuffer.unmap(); + return gpuWriteBuffer; +} + async function testCreateTensorFromGPUBuffer( dtype: tf.DataType, useDefaultShapeAndType = false) { const webGPUBackend = tf.backend() as WebGPUBackend; @@ -443,6 +465,21 @@ describeWebGPU('create tensor from GPUBuffer', () => { await testCreateTensorFromGPUBuffer('int32'); }); + it('work for read', async () => { + const webGPUBackend = tf.backend() as WebGPUBackend; + const device = webGPUBackend.device; + const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + const dtype = 'float32'; + const aBuffer = createReadonlyGPUBufferFromData( + device, aData, dtype, + GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE | + GPUBufferUsage.COPY_SRC); + const shape: number[] = [aData.length]; + const a = tf.tensor({buffer: aBuffer}, shape, dtype); + await a.data(); + aBuffer.destroy(); + }); + it('two tensors share the same GPUBuffer', async () => { const webGPUBackend = tf.backend() as WebGPUBackend; const device = webGPUBackend.device; @@ -493,7 +530,7 @@ describeWebGPU('create tensor from GPUBuffer', () => { aBuffer.destroy(); }); - it('throw for GPUBuffer size is smaller than tensor size', async () => { + it('throw when GPUBuffer size is smaller than tensor size', async () => { const webGPUBackend = tf.backend() as WebGPUBackend; const device = webGPUBackend.device; const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; @@ -505,4 +542,18 @@ describeWebGPU('create tensor from GPUBuffer', () => { expect(a).toThrowError(); aBuffer.destroy(); }); + + it('throw when GPUBuffer usage is not correct', async () => { + const webGPUBackend = tf.backend() as WebGPUBackend; + const device = webGPUBackend.device; + const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + const dtype = 'float32'; + // Create a GPUBuffer without GPUBufferUsage.STORAGE. + const aBuffer = createStagingGPUBufferFromData(device, aData, dtype); + // Throw when GPUBuffer usage is not correct. + const shape: number[] = [aData.length]; + const a = () => tf.tensor({buffer: aBuffer}, shape, dtype); + expect(a).toThrowError(); + aBuffer.destroy(); + }); }); diff --git a/tfjs-core/src/ops/tensor.ts b/tfjs-core/src/ops/tensor.ts index 80e7569265..e9dcd04366 100644 --- a/tfjs-core/src/ops/tensor.ts +++ b/tfjs-core/src/ops/tensor.ts @@ -170,8 +170,11 @@ import {makeTensor} from './tensor_ops_util'; * tensor values. ). (If the values passed from texture is less than the tensor * size, zeros will be padded at the rear.). If the values is a `WebGPUData` * object, the dtype could only be 'float32' or 'int32 and the object has to - * have: buffer, a `GPUBuffer`, the buffer must share the same `GPUDevice` with - * TFJS's WebGPU backend. + * have: buffer, a `GPUBuffer`. The buffer must: 1. share the same `GPUDevice` + * with TFJS's WebGPU backend; 2.buffer.usage should at least support + * GPUBufferUsage.STORAGE, to support tensor.data, GPUBufferUsage.COPY_SRC is + * also required; 3. buffer.size should not be smaller than the byte size of + * tensor shape. * @param shape The shape of the tensor. Optional. If not provided, * it is inferred from `values`. * @param dtype The data type. From 54ed5e9038f53ff6a0d073c8d3e74a32e133f933 Mon Sep 17 00:00:00 2001 From: Xu Xing Date: Mon, 21 Nov 2022 15:09:27 +0800 Subject: [PATCH 7/9] Add flag WEBGPU_TENSOR_FROM_BUFFER_WITH_ZERO_COPY --- tfjs-backend-webgpu/src/backend_webgpu.ts | 39 +++++++++--- .../src/backend_webgpu_test.ts | 63 ++++++++++++++----- tfjs-backend-webgpu/src/flags_webgpu.ts | 8 ++- tfjs-core/src/ops/tensor.ts | 14 ++--- 4 files changed, 91 insertions(+), 33 deletions(-) diff --git a/tfjs-backend-webgpu/src/backend_webgpu.ts b/tfjs-backend-webgpu/src/backend_webgpu.ts index 7aa8ddfd3e..99c1cff681 100644 --- a/tfjs-backend-webgpu/src/backend_webgpu.ts +++ b/tfjs-backend-webgpu/src/backend_webgpu.ts @@ -51,8 +51,12 @@ type TensorData = { shape: number[], refCount: number, resourceInfo?: BufferInfo|TextureInfo, - // Indicate the tensor is created from an external GPU resource. - external?: boolean, + // zeroCopy is used for creating tensor from GPUBuffer. When zeroCopy is false + // or undefined (default), this GPUBuffer will be copied to the tensor's + // resource buffer. When zeroCopy is true, tensor will use this GPUBUffer as + // tensor's resource buffer, user should not destroy this GPUBuffer until all + // access are done. + zeroCopy?: boolean, // For complex numbers, the real and imaginary parts are stored as their own // individual tensors, with a parent joining the two with the // complexTensorInfos field. @@ -244,8 +248,9 @@ export class WebGPUBackend extends KernelBackend { if (!tensorData || !tensorData.resourceInfo) { return; } - // If tensor data is from external resource, do not release. - if (tensorData.external) { + // If tensor's resource buffer is from a zero copy GPUBuffer, do not + // release. + if (tensorData.zeroCopy) { tensorData.resourceInfo = null; return; } @@ -445,18 +450,31 @@ export class WebGPUBackend extends KernelBackend { return vals; } + // The source GPUBuffer and destination GPUBuffer have the same size and + // usage. + private copyBuffer(srcBuffer: GPUBuffer, size: number, usage: number) { + const dstBuffer = this.bufferManager.acquireBuffer(size, usage); + this.ensureCommandEncoderReady(); + this.ensureComputePassEnded(); + this.currentCommandEncoder.copyBufferToBuffer( + srcBuffer, 0, dstBuffer, 0, size); + this.submitQueue(); + return dstBuffer; + } + /** * Create a TF.js tensor out of an existing WebGPU buffer. */ override createTensorFromGPUData( values: WebGPUData, shape: number[], dtype: DataType): Tensor { - const buffer = values.buffer; + let buffer = values.buffer; if (dtype === 'complex64') { throw new Error(`Cannot write to a complex64 dtype. `); } const dataId = {id: this.nextDataId()}; + const zeroCopy = env().getBool('WEBGPU_TENSOR_FROM_BUFFER_WITH_ZERO_COPY'); this.tensorMap.set( - dataId, {dtype, shape, values: null, refCount: 1, external: true}); + dataId, {dtype, shape, values: null, refCount: 1, zeroCopy}); const tensorData = this.tensorMap.get(dataId); const size = webgpu_util.GPUBytesPerElement(tensorData.dtype) * util.sizeFromShape(tensorData.shape); @@ -469,6 +487,10 @@ export class WebGPUBackend extends KernelBackend { throw new Error('GPUBuffer.usage should include GPUBufferUsage.STORAGE!'); } + // Do buffer copy by default. + if (zeroCopy === false) { + buffer = this.copyBuffer(buffer, size, buffer.usage); + } tensorData.resourceInfo = {size: buffer.size, usage: buffer.usage, buffer}; return engine().makeTensorFromDataId(dataId, shape, dtype, this); } @@ -659,9 +681,8 @@ export class WebGPUBackend extends KernelBackend { // TODO: WebGPU doesn't support read data synchronously from GPU to CPU. // So it will report error when switching backend from WebGPU to others. // There are two situations: 1) swithcing the backend after running a - // model; 2) swithcing the backend within the model. Temporarilly keep the - // values on CPU to solve the first issue. - // tensorData.values = null; + // model; 2) swithcing the backend within the model. Temporarilly keep + // the values on CPU to solve the first issue. tensorData.values = null; } } diff --git a/tfjs-backend-webgpu/src/backend_webgpu_test.ts b/tfjs-backend-webgpu/src/backend_webgpu_test.ts index 0d1af6f3e5..2076b5d8a1 100644 --- a/tfjs-backend-webgpu/src/backend_webgpu_test.ts +++ b/tfjs-backend-webgpu/src/backend_webgpu_test.ts @@ -369,7 +369,8 @@ describeWebGPU('keeping data on gpu ', () => { function createReadonlyGPUBufferFromData( device: GPUDevice, data: number[], dtype: tf.DataType, - bufferUsage = GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE) { + bufferUsage = GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE | + GPUBufferUsage.COPY_SRC) { const bytesPerElement = 4; const sizeInBytes = data.length * bytesPerElement; @@ -427,7 +428,7 @@ function createStagingGPUBufferFromData( } async function testCreateTensorFromGPUBuffer( - dtype: tf.DataType, useDefaultShapeAndType = false) { + dtype: tf.DataType, useDefaultShapeAndType = false, zeroCopy = false) { const webGPUBackend = tf.backend() as WebGPUBackend; const device = webGPUBackend.device; const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; @@ -439,6 +440,9 @@ async function testCreateTensorFromGPUBuffer( const startNumTensors = tf.memory().numTensors; const a = useDefaultShapeAndType ? tf.tensor({buffer: aBuffer}) : tf.tensor({buffer: aBuffer}, shape, dtype); + if (zeroCopy !== true) { + aBuffer.destroy(); + } const b = tf.tensor(bData, shape, dtype); const result = tf.add(a, b); tf.test_util.expectArraysClose(await result.data(), expected); @@ -449,20 +453,22 @@ async function testCreateTensorFromGPUBuffer( const endNumTensors = tf.memory().numTensors; expect(endNumBytes - startNumBytes).toEqual(0); expect(endNumTensors - startNumTensors).toEqual(0); - aBuffer.destroy(); + if (zeroCopy === true) { + aBuffer.destroy(); + } } -describeWebGPU('create tensor from GPUBuffer', () => { +function createTensorFromGPUTest(zeroCopy = false) { it('use default shape and data type(float32)', async () => { - await testCreateTensorFromGPUBuffer('float32', true); + await testCreateTensorFromGPUBuffer('float32', true, zeroCopy); }); it('work for float32', async () => { - await testCreateTensorFromGPUBuffer('float32'); + await testCreateTensorFromGPUBuffer('float32', false, zeroCopy); }); it('work for int32', async () => { - await testCreateTensorFromGPUBuffer('int32'); + await testCreateTensorFromGPUBuffer('int32', false, zeroCopy); }); it('work for read', async () => { @@ -470,14 +476,16 @@ describeWebGPU('create tensor from GPUBuffer', () => { const device = webGPUBackend.device; const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; const dtype = 'float32'; - const aBuffer = createReadonlyGPUBufferFromData( - device, aData, dtype, - GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE | - GPUBufferUsage.COPY_SRC); + const aBuffer = createReadonlyGPUBufferFromData(device, aData, dtype); const shape: number[] = [aData.length]; const a = tf.tensor({buffer: aBuffer}, shape, dtype); + if (zeroCopy !== true) { + aBuffer.destroy(); + } await a.data(); - aBuffer.destroy(); + if (zeroCopy === true) { + aBuffer.destroy(); + } }); it('two tensors share the same GPUBuffer', async () => { @@ -491,6 +499,9 @@ describeWebGPU('create tensor from GPUBuffer', () => { const shape: number[] = [aData.length]; const a = tf.tensor({buffer: aBuffer}, shape, dtype); const b = tf.tensor({buffer: aBuffer}, shape, dtype); + if (zeroCopy !== true) { + aBuffer.destroy(); + } const result = tf.add(a, b); const expected = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]; @@ -502,7 +513,9 @@ describeWebGPU('create tensor from GPUBuffer', () => { const endNumTensors = tf.memory().numTensors; expect(endNumBytes - startNumBytes).toEqual(0); expect(endNumTensors - startNumTensors).toEqual(0); - aBuffer.destroy(); + if (zeroCopy === true) { + aBuffer.destroy(); + } }); it('GPUBuffer size is bigger than tensor size', async () => { @@ -517,6 +530,9 @@ describeWebGPU('create tensor from GPUBuffer', () => { const shape: number[] = [aData.length - 1]; const a = tf.tensor({buffer: aBuffer}, shape, dtype); const b = tf.tensor({buffer: aBuffer}, shape, dtype); + if (zeroCopy !== true) { + aBuffer.destroy(); + } const result = tf.add(a, b); const expected = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]; tf.test_util.expectArraysClose(await result.data(), expected); @@ -527,7 +543,9 @@ describeWebGPU('create tensor from GPUBuffer', () => { const endNumTensors = tf.memory().numTensors; expect(endNumBytes - startNumBytes).toEqual(0); expect(endNumTensors - startNumTensors).toEqual(0); - aBuffer.destroy(); + if (zeroCopy === true) { + aBuffer.destroy(); + } }); it('throw when GPUBuffer size is smaller than tensor size', async () => { @@ -556,4 +574,21 @@ describeWebGPU('create tensor from GPUBuffer', () => { expect(a).toThrowError(); aBuffer.destroy(); }); +} + +describeWebGPU('create tensor from GPUBuffer', () => { + createTensorFromGPUTest(); +}); + +describeWebGPU('create tensor from GPUBuffer with zero copy', () => { + let savedZeroCopyFlag = false; + beforeAll(() => { + savedZeroCopyFlag = + tf.env().get('WEBGPU_TENSOR_FROM_BUFFER_WITH_ZERO_COPY') as boolean; + tf.env().set('WEBGPU_TENSOR_FROM_BUFFER_WITH_ZERO_COPY', true); + }); + afterAll(() => { + tf.env().set('WEBGPU_TENSOR_FROM_BUFFER_WITH_ZERO_COPY', savedZeroCopyFlag); + }); + createTensorFromGPUTest(true); }); diff --git a/tfjs-backend-webgpu/src/flags_webgpu.ts b/tfjs-backend-webgpu/src/flags_webgpu.ts index f639b67e4d..31ecc1ebc9 100644 --- a/tfjs-backend-webgpu/src/flags_webgpu.ts +++ b/tfjs-backend-webgpu/src/flags_webgpu.ts @@ -76,10 +76,14 @@ ENV.registerFlag('WEBGPU_USE_NAIVE_CONV2D_DEBUG', () => false); * are dispatched, it means the hardware may be in low occupancy. * 0 means it's not set by the user. A default strategy will be applied. */ -ENV.registerFlag( - 'WEBGPU_THRESHOLD_TO_INCREASE_WORKGROUPS_FOR_MATMUL', () => 0); +ENV.registerFlag('WEBGPU_THRESHOLD_TO_INCREASE_WORKGROUPS_FOR_MATMUL', () => 0); /** * Whether we will run im2col as a separate shader for convolution. */ ENV.registerFlag('WEBGPU_CONV_SEPARATE_IM2COL_SHADER', () => false); + +/** + * Whether use zero copy when create tensor from GPUBuffer. + */ +ENV.registerFlag('WEBGPU_TENSOR_FROM_BUFFER_WITH_ZERO_COPY', () => false); diff --git a/tfjs-core/src/ops/tensor.ts b/tfjs-core/src/ops/tensor.ts index e9dcd04366..6d4ba28871 100644 --- a/tfjs-core/src/ops/tensor.ts +++ b/tfjs-core/src/ops/tensor.ts @@ -99,9 +99,7 @@ import {makeTensor} from './tensor_ops_util'; * // This makes it possible for TF.js applications to avoid GPU / CPU sync. * // For example, if your application includes a preprocessing step on the GPU, * // you could upload the GPU output directly to TF.js, rather than first - * // downloading the values. Unlike WebGL, to support zero copy, this GPUBuffer - * // is bound directly by the tensor. So donot destroy this GPUBuffer until all - * // access are done. + * // downloading the values. * * // Example for WebGPU: * function createReadonlyGPUBufferFromData(device, data, dtype) { @@ -128,7 +126,8 @@ import {makeTensor} from './tensor_ops_util'; * const gpuReadBuffer = device.createBuffer({ * mappedAtCreation: false, * size: sizeInBytes, - * usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE + * usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE | + * GPUBufferUsage.COPY_SRC * }); * * const copyEncoder = device.createCommandEncoder(); @@ -171,10 +170,9 @@ import {makeTensor} from './tensor_ops_util'; * size, zeros will be padded at the rear.). If the values is a `WebGPUData` * object, the dtype could only be 'float32' or 'int32 and the object has to * have: buffer, a `GPUBuffer`. The buffer must: 1. share the same `GPUDevice` - * with TFJS's WebGPU backend; 2.buffer.usage should at least support - * GPUBufferUsage.STORAGE, to support tensor.data, GPUBufferUsage.COPY_SRC is - * also required; 3. buffer.size should not be smaller than the byte size of - * tensor shape. + * with TFJS's WebGPU backend; 2. buffer.usage should at least support + * GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC; 3. buffer.size should not + * be smaller than the byte size of tensor shape. * @param shape The shape of the tensor. Optional. If not provided, * it is inferred from `values`. * @param dtype The data type. From 8a7a39c68ec26c36958d60fa945715583daa18da Mon Sep 17 00:00:00 2001 From: Xu Xing Date: Tue, 22 Nov 2022 09:49:31 +0800 Subject: [PATCH 8/9] Support zeroCopy in WebGPUData --- tfjs-backend-webgpu/src/backend_webgpu.ts | 28 ++++--- .../src/backend_webgpu_test.ts | 78 +++++++------------ tfjs-backend-webgpu/src/flags_webgpu.ts | 5 -- tfjs-core/src/ops/tensor.ts | 18 ++++- tfjs-core/src/types.ts | 11 ++- 5 files changed, 63 insertions(+), 77 deletions(-) diff --git a/tfjs-backend-webgpu/src/backend_webgpu.ts b/tfjs-backend-webgpu/src/backend_webgpu.ts index 99c1cff681..fbe1c7ebf2 100644 --- a/tfjs-backend-webgpu/src/backend_webgpu.ts +++ b/tfjs-backend-webgpu/src/backend_webgpu.ts @@ -51,12 +51,9 @@ type TensorData = { shape: number[], refCount: number, resourceInfo?: BufferInfo|TextureInfo, - // zeroCopy is used for creating tensor from GPUBuffer. When zeroCopy is false - // or undefined (default), this GPUBuffer will be copied to the tensor's - // resource buffer. When zeroCopy is true, tensor will use this GPUBUffer as - // tensor's resource buffer, user should not destroy this GPUBuffer until all - // access are done. - zeroCopy?: boolean, + // external is true means we use the resource provided by users directly + // (without a copy), so users should be responsible for its release. + external?: boolean, // For complex numbers, the real and imaginary parts are stored as their own // individual tensors, with a parent joining the two with the // complexTensorInfos field. @@ -248,9 +245,8 @@ export class WebGPUBackend extends KernelBackend { if (!tensorData || !tensorData.resourceInfo) { return; } - // If tensor's resource buffer is from a zero copy GPUBuffer, do not - // release. - if (tensorData.zeroCopy) { + // If tensor's resource is from external, do not release. + if (tensorData.external) { tensorData.resourceInfo = null; return; } @@ -472,9 +468,9 @@ export class WebGPUBackend extends KernelBackend { throw new Error(`Cannot write to a complex64 dtype. `); } const dataId = {id: this.nextDataId()}; - const zeroCopy = env().getBool('WEBGPU_TENSOR_FROM_BUFFER_WITH_ZERO_COPY'); this.tensorMap.set( - dataId, {dtype, shape, values: null, refCount: 1, zeroCopy}); + dataId, + {dtype, shape, values: null, refCount: 1, external: values.zeroCopy}); const tensorData = this.tensorMap.get(dataId); const size = webgpu_util.GPUBytesPerElement(tensorData.dtype) * util.sizeFromShape(tensorData.shape); @@ -482,13 +478,15 @@ export class WebGPUBackend extends KernelBackend { throw new Error(`GPUBuffer size(${ values.buffer.size}) is smaller than tensor size(${size})!`); } else if ( - (values.buffer.usage & GPUBufferUsage.STORAGE) !== - GPUBufferUsage.STORAGE) { - throw new Error('GPUBuffer.usage should include GPUBufferUsage.STORAGE!'); + (values.buffer.usage & + (GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC)) !== + (GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC)) { + throw new Error( + 'GPUBuffer.usage should include GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC!'); } // Do buffer copy by default. - if (zeroCopy === false) { + if (values.zeroCopy !== true) { buffer = this.copyBuffer(buffer, size, buffer.usage); } tensorData.resourceInfo = {size: buffer.size, usage: buffer.usage, buffer}; diff --git a/tfjs-backend-webgpu/src/backend_webgpu_test.ts b/tfjs-backend-webgpu/src/backend_webgpu_test.ts index 2076b5d8a1..d41b241ba3 100644 --- a/tfjs-backend-webgpu/src/backend_webgpu_test.ts +++ b/tfjs-backend-webgpu/src/backend_webgpu_test.ts @@ -367,10 +367,8 @@ describeWebGPU('keeping data on gpu ', () => { }); }); -function createReadonlyGPUBufferFromData( - device: GPUDevice, data: number[], dtype: tf.DataType, - bufferUsage = GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE | - GPUBufferUsage.COPY_SRC) { +function createStagingGPUBufferFromData( + device: GPUDevice, data: number[], dtype: tf.DataType) { const bytesPerElement = 4; const sizeInBytes = data.length * bytesPerElement; @@ -390,7 +388,17 @@ function createReadonlyGPUBufferFromData( `'float32'|'int32' dtype, while the dtype is ${dtype}.`); } gpuWriteBuffer.unmap(); + return gpuWriteBuffer; +} +function createGPUBufferFromData( + device: GPUDevice, data: number[], dtype: tf.DataType, + bufferUsage = GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE | + GPUBufferUsage.COPY_SRC) { + const bytesPerElement = 4; + const sizeInBytes = data.length * bytesPerElement; + + const gpuWriteBuffer = createStagingGPUBufferFromData(device, data, dtype); const gpuReadBuffer = device.createBuffer( {mappedAtCreation: false, size: sizeInBytes, usage: bufferUsage}); @@ -403,30 +411,6 @@ function createReadonlyGPUBufferFromData( return gpuReadBuffer; } -function createStagingGPUBufferFromData( - device: GPUDevice, data: number[], dtype: tf.DataType) { - const bytesPerElement = 4; - const sizeInBytes = data.length * bytesPerElement; - - const gpuWriteBuffer = device.createBuffer({ - mappedAtCreation: true, - size: sizeInBytes, - usage: GPUBufferUsage.MAP_WRITE | GPUBufferUsage.COPY_SRC - }); - const arrayBuffer = gpuWriteBuffer.getMappedRange(); - if (dtype === 'float32') { - new Float32Array(arrayBuffer).set(data); - } else if (dtype === 'int32') { - new Int32Array(arrayBuffer).set(data); - } else { - throw new Error( - `Creating tensor from GPUBuffer only supports` + - `'float32'|'int32' dtype, while the dtype is ${dtype}.`); - } - gpuWriteBuffer.unmap(); - return gpuWriteBuffer; -} - async function testCreateTensorFromGPUBuffer( dtype: tf.DataType, useDefaultShapeAndType = false, zeroCopy = false) { const webGPUBackend = tf.backend() as WebGPUBackend; @@ -434,12 +418,13 @@ async function testCreateTensorFromGPUBuffer( const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; const bData = [1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4]; const expected = [2, 4, 6, 8, 6, 8, 10, 12, 10, 12, 14, 16, 14, 16, 18, 20]; - const aBuffer = createReadonlyGPUBufferFromData(device, aData, dtype); + const aBuffer = createGPUBufferFromData(device, aData, dtype); const shape: number[] = [aData.length]; const startNumBytes = tf.memory().numBytes; const startNumTensors = tf.memory().numTensors; - const a = useDefaultShapeAndType ? tf.tensor({buffer: aBuffer}) : - tf.tensor({buffer: aBuffer}, shape, dtype); + const webGPUData = {buffer: aBuffer, zeroCopy}; + const a = useDefaultShapeAndType ? tf.tensor(webGPUData) : + tf.tensor(webGPUData, shape, dtype); if (zeroCopy !== true) { aBuffer.destroy(); } @@ -476,9 +461,9 @@ function createTensorFromGPUTest(zeroCopy = false) { const device = webGPUBackend.device; const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; const dtype = 'float32'; - const aBuffer = createReadonlyGPUBufferFromData(device, aData, dtype); + const aBuffer = createGPUBufferFromData(device, aData, dtype); const shape: number[] = [aData.length]; - const a = tf.tensor({buffer: aBuffer}, shape, dtype); + const a = tf.tensor({buffer: aBuffer, zeroCopy}, shape, dtype); if (zeroCopy !== true) { aBuffer.destroy(); } @@ -493,12 +478,13 @@ function createTensorFromGPUTest(zeroCopy = false) { const device = webGPUBackend.device; const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; const dtype = 'float32'; - const aBuffer = createReadonlyGPUBufferFromData(device, aData, dtype); + const aBuffer = createGPUBufferFromData(device, aData, dtype); const startNumBytes = tf.memory().numBytes; const startNumTensors = tf.memory().numTensors; const shape: number[] = [aData.length]; - const a = tf.tensor({buffer: aBuffer}, shape, dtype); - const b = tf.tensor({buffer: aBuffer}, shape, dtype); + const webGPUData = {buffer: aBuffer, zeroCopy}; + const a = tf.tensor(webGPUData, shape, dtype); + const b = tf.tensor(webGPUData, shape, dtype); if (zeroCopy !== true) { aBuffer.destroy(); } @@ -523,13 +509,14 @@ function createTensorFromGPUTest(zeroCopy = false) { const device = webGPUBackend.device; const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; const dtype = 'float32'; - const aBuffer = createReadonlyGPUBufferFromData(device, aData, dtype); + const aBuffer = createGPUBufferFromData(device, aData, dtype); const startNumBytes = tf.memory().numBytes; const startNumTensors = tf.memory().numTensors; // GPUBuffer.size is bigger than shape size const shape: number[] = [aData.length - 1]; - const a = tf.tensor({buffer: aBuffer}, shape, dtype); - const b = tf.tensor({buffer: aBuffer}, shape, dtype); + const webGPUData = {buffer: aBuffer, zeroCopy}; + const a = tf.tensor(webGPUData, shape, dtype); + const b = tf.tensor(webGPUData, shape, dtype); if (zeroCopy !== true) { aBuffer.destroy(); } @@ -553,7 +540,7 @@ function createTensorFromGPUTest(zeroCopy = false) { const device = webGPUBackend.device; const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; const dtype = 'float32'; - const aBuffer = createReadonlyGPUBufferFromData(device, aData, dtype); + const aBuffer = createGPUBufferFromData(device, aData, dtype); // Throw when GPUBuffer.size is smaller than shape size const shape: number[] = [aData.length + 1]; const a = () => tf.tensor({buffer: aBuffer}, shape, dtype); @@ -570,7 +557,7 @@ function createTensorFromGPUTest(zeroCopy = false) { const aBuffer = createStagingGPUBufferFromData(device, aData, dtype); // Throw when GPUBuffer usage is not correct. const shape: number[] = [aData.length]; - const a = () => tf.tensor({buffer: aBuffer}, shape, dtype); + const a = () => tf.tensor({buffer: aBuffer, zeroCopy}, shape, dtype); expect(a).toThrowError(); aBuffer.destroy(); }); @@ -581,14 +568,5 @@ describeWebGPU('create tensor from GPUBuffer', () => { }); describeWebGPU('create tensor from GPUBuffer with zero copy', () => { - let savedZeroCopyFlag = false; - beforeAll(() => { - savedZeroCopyFlag = - tf.env().get('WEBGPU_TENSOR_FROM_BUFFER_WITH_ZERO_COPY') as boolean; - tf.env().set('WEBGPU_TENSOR_FROM_BUFFER_WITH_ZERO_COPY', true); - }); - afterAll(() => { - tf.env().set('WEBGPU_TENSOR_FROM_BUFFER_WITH_ZERO_COPY', savedZeroCopyFlag); - }); createTensorFromGPUTest(true); }); diff --git a/tfjs-backend-webgpu/src/flags_webgpu.ts b/tfjs-backend-webgpu/src/flags_webgpu.ts index 31ecc1ebc9..49ab70ffba 100644 --- a/tfjs-backend-webgpu/src/flags_webgpu.ts +++ b/tfjs-backend-webgpu/src/flags_webgpu.ts @@ -82,8 +82,3 @@ ENV.registerFlag('WEBGPU_THRESHOLD_TO_INCREASE_WORKGROUPS_FOR_MATMUL', () => 0); * Whether we will run im2col as a separate shader for convolution. */ ENV.registerFlag('WEBGPU_CONV_SEPARATE_IM2COL_SHADER', () => false); - -/** - * Whether use zero copy when create tensor from GPUBuffer. - */ -ENV.registerFlag('WEBGPU_TENSOR_FROM_BUFFER_WITH_ZERO_COPY', () => false); diff --git a/tfjs-core/src/ops/tensor.ts b/tfjs-core/src/ops/tensor.ts index 6d4ba28871..72ff5bfc11 100644 --- a/tfjs-core/src/ops/tensor.ts +++ b/tfjs-core/src/ops/tensor.ts @@ -99,10 +99,14 @@ import {makeTensor} from './tensor_ops_util'; * // This makes it possible for TF.js applications to avoid GPU / CPU sync. * // For example, if your application includes a preprocessing step on the GPU, * // you could upload the GPU output directly to TF.js, rather than first - * // downloading the values. + * // downloading the values. Unlike WebGL, this optionally supports zero copy + * // by WebGPUData.zeroCopy. When zeroCopy is false or undefined(default), this + * // passing GPUBuffer can be destroyed after tensor is created. When zeroCopy + * // is true, this GPUBuffer is bound directly by the tensor, so donot destroy + * // this GPUBuffer until all access is done. * * // Example for WebGPU: - * function createReadonlyGPUBufferFromData(device, data, dtype) { + * function createGPUBufferFromData(device, data, dtype) { * const bytesPerElement = 4; * const sizeInBytes = data.length * bytesPerElement; * @@ -144,8 +148,10 @@ import {makeTensor} from './tensor_ops_util'; * const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; * const bData = [1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4]; * const expected = [2, 4, 6, 8, 6, 8, 10, 12, 10, 12, 14, 16, 14, 16, 18, 20]; - * const aBuffer = createReadonlyGPUBufferFromData(device, aData, dtype); + * const aBuffer = createGPUBufferFromData(device, aData, dtype); * const shape = [aData.length]; + * // To use zeroCopy, use {buffer: aBuffer, zeroCopy: true} instead and destroy + * // aBuffer untill all access is done. * const a = tf.tensor({buffer: aBuffer}, shape, dtype); * const b = tf.tensor(bData, shape, dtype); * const result = tf.add(a, b); @@ -172,7 +178,11 @@ import {makeTensor} from './tensor_ops_util'; * have: buffer, a `GPUBuffer`. The buffer must: 1. share the same `GPUDevice` * with TFJS's WebGPU backend; 2. buffer.usage should at least support * GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC; 3. buffer.size should not - * be smaller than the byte size of tensor shape. + * be smaller than the byte size of tensor shape. WebGPUData optionally supports + * zero copy by flag zeroCopy. When zeroCopy is false or undefined(default), + * this passing GPUBuffer can be destroyed after tensor is created. When + * zeroCopy is true, this GPUBuffer is bound directly by the tensor, so donot + * destroy this GPUBuffer until all access is done. * @param shape The shape of the tensor. Optional. If not provided, * it is inferred from `values`. * @param dtype The data type. diff --git a/tfjs-core/src/types.ts b/tfjs-core/src/types.ts index 5c38df4c38..2d3fe88dda 100644 --- a/tfjs-core/src/types.ts +++ b/tfjs-core/src/types.ts @@ -184,10 +184,15 @@ export interface WebGLData { } /** - * Type for representing a buffer data to create a tensor. Use default usage - * GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC. If not specified at - * creating a tensor, tensor type is float32. + * Type for representing a buffer data to create a tensor. Buffer usage should + * at least support GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC. When + * zeroCopy is false or undefined (default), this GPUBuffer will be copied to + * the tensor's resource buffer. When zeroCopy is true, tensor will use this + * GPUBuffer as tensor's resource buffer, user should not destroy this GPUBuffer + * until all access is done. If not specified at creating a tensor, tensor type + * is float32. */ export interface WebGPUData { buffer: GPUBuffer; + zeroCopy?: boolean; } From fd85685a09dfb326071548e1edc3cb3df2cd6ad4 Mon Sep 17 00:00:00 2001 From: Xu Xing Date: Wed, 23 Nov 2022 07:37:44 +0800 Subject: [PATCH 9/9] Fix donot --- tfjs-core/src/ops/tensor.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tfjs-core/src/ops/tensor.ts b/tfjs-core/src/ops/tensor.ts index 72ff5bfc11..1a6be14cba 100644 --- a/tfjs-core/src/ops/tensor.ts +++ b/tfjs-core/src/ops/tensor.ts @@ -102,7 +102,7 @@ import {makeTensor} from './tensor_ops_util'; * // downloading the values. Unlike WebGL, this optionally supports zero copy * // by WebGPUData.zeroCopy. When zeroCopy is false or undefined(default), this * // passing GPUBuffer can be destroyed after tensor is created. When zeroCopy - * // is true, this GPUBuffer is bound directly by the tensor, so donot destroy + * // is true, this GPUBuffer is bound directly by the tensor, so do not destroy * // this GPUBuffer until all access is done. * * // Example for WebGPU: @@ -181,7 +181,7 @@ import {makeTensor} from './tensor_ops_util'; * be smaller than the byte size of tensor shape. WebGPUData optionally supports * zero copy by flag zeroCopy. When zeroCopy is false or undefined(default), * this passing GPUBuffer can be destroyed after tensor is created. When - * zeroCopy is true, this GPUBuffer is bound directly by the tensor, so donot + * zeroCopy is true, this GPUBuffer is bound directly by the tensor, so do not * destroy this GPUBuffer until all access is done. * @param shape The shape of the tensor. Optional. If not provided, * it is inferred from `values`.