Skip to content

Commit

Permalink
Support zeroCopy in WebGPUData
Browse files Browse the repository at this point in the history
  • Loading branch information
axinging committed Nov 22, 2022
1 parent 54ed5e9 commit 8a7a39c
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 77 deletions.
28 changes: 13 additions & 15 deletions tfjs-backend-webgpu/src/backend_webgpu.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,9 @@ type TensorData = {
shape: number[],
refCount: number,
resourceInfo?: BufferInfo|TextureInfo,
// zeroCopy is used for creating tensor from GPUBuffer. When zeroCopy is false
// or undefined (default), this GPUBuffer will be copied to the tensor's
// resource buffer. When zeroCopy is true, tensor will use this GPUBUffer as
// tensor's resource buffer, user should not destroy this GPUBuffer until all
// access are done.
zeroCopy?: boolean,
// external is true means we use the resource provided by users directly
// (without a copy), so users should be responsible for its release.
external?: boolean,
// For complex numbers, the real and imaginary parts are stored as their own
// individual tensors, with a parent joining the two with the
// complexTensorInfos field.
Expand Down Expand Up @@ -248,9 +245,8 @@ export class WebGPUBackend extends KernelBackend {
if (!tensorData || !tensorData.resourceInfo) {
return;
}
// If tensor's resource buffer is from a zero copy GPUBuffer, do not
// release.
if (tensorData.zeroCopy) {
// If tensor's resource is from external, do not release.
if (tensorData.external) {
tensorData.resourceInfo = null;
return;
}
Expand Down Expand Up @@ -472,23 +468,25 @@ export class WebGPUBackend extends KernelBackend {
throw new Error(`Cannot write to a complex64 dtype. `);
}
const dataId = {id: this.nextDataId()};
const zeroCopy = env().getBool('WEBGPU_TENSOR_FROM_BUFFER_WITH_ZERO_COPY');
this.tensorMap.set(
dataId, {dtype, shape, values: null, refCount: 1, zeroCopy});
dataId,
{dtype, shape, values: null, refCount: 1, external: values.zeroCopy});
const tensorData = this.tensorMap.get(dataId);
const size = webgpu_util.GPUBytesPerElement(tensorData.dtype) *
util.sizeFromShape(tensorData.shape);
if (values.buffer.size < size) {
throw new Error(`GPUBuffer size(${
values.buffer.size}) is smaller than tensor size(${size})!`);
} else if (
(values.buffer.usage & GPUBufferUsage.STORAGE) !==
GPUBufferUsage.STORAGE) {
throw new Error('GPUBuffer.usage should include GPUBufferUsage.STORAGE!');
(values.buffer.usage &
(GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC)) !==
(GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC)) {
throw new Error(
'GPUBuffer.usage should include GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC!');
}

// Do buffer copy by default.
if (zeroCopy === false) {
if (values.zeroCopy !== true) {
buffer = this.copyBuffer(buffer, size, buffer.usage);
}
tensorData.resourceInfo = {size: buffer.size, usage: buffer.usage, buffer};
Expand Down
78 changes: 28 additions & 50 deletions tfjs-backend-webgpu/src/backend_webgpu_test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -367,10 +367,8 @@ describeWebGPU('keeping data on gpu ', () => {
});
});

function createReadonlyGPUBufferFromData(
device: GPUDevice, data: number[], dtype: tf.DataType,
bufferUsage = GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE |
GPUBufferUsage.COPY_SRC) {
function createStagingGPUBufferFromData(
device: GPUDevice, data: number[], dtype: tf.DataType) {
const bytesPerElement = 4;
const sizeInBytes = data.length * bytesPerElement;

Expand All @@ -390,7 +388,17 @@ function createReadonlyGPUBufferFromData(
`'float32'|'int32' dtype, while the dtype is ${dtype}.`);
}
gpuWriteBuffer.unmap();
return gpuWriteBuffer;
}

function createGPUBufferFromData(
device: GPUDevice, data: number[], dtype: tf.DataType,
bufferUsage = GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE |
GPUBufferUsage.COPY_SRC) {
const bytesPerElement = 4;
const sizeInBytes = data.length * bytesPerElement;

const gpuWriteBuffer = createStagingGPUBufferFromData(device, data, dtype);
const gpuReadBuffer = device.createBuffer(
{mappedAtCreation: false, size: sizeInBytes, usage: bufferUsage});

Expand All @@ -403,43 +411,20 @@ function createReadonlyGPUBufferFromData(
return gpuReadBuffer;
}

function createStagingGPUBufferFromData(
device: GPUDevice, data: number[], dtype: tf.DataType) {
const bytesPerElement = 4;
const sizeInBytes = data.length * bytesPerElement;

const gpuWriteBuffer = device.createBuffer({
mappedAtCreation: true,
size: sizeInBytes,
usage: GPUBufferUsage.MAP_WRITE | GPUBufferUsage.COPY_SRC
});
const arrayBuffer = gpuWriteBuffer.getMappedRange();
if (dtype === 'float32') {
new Float32Array(arrayBuffer).set(data);
} else if (dtype === 'int32') {
new Int32Array(arrayBuffer).set(data);
} else {
throw new Error(
`Creating tensor from GPUBuffer only supports` +
`'float32'|'int32' dtype, while the dtype is ${dtype}.`);
}
gpuWriteBuffer.unmap();
return gpuWriteBuffer;
}

async function testCreateTensorFromGPUBuffer(
dtype: tf.DataType, useDefaultShapeAndType = false, zeroCopy = false) {
const webGPUBackend = tf.backend() as WebGPUBackend;
const device = webGPUBackend.device;
const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
const bData = [1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4];
const expected = [2, 4, 6, 8, 6, 8, 10, 12, 10, 12, 14, 16, 14, 16, 18, 20];
const aBuffer = createReadonlyGPUBufferFromData(device, aData, dtype);
const aBuffer = createGPUBufferFromData(device, aData, dtype);
const shape: number[] = [aData.length];
const startNumBytes = tf.memory().numBytes;
const startNumTensors = tf.memory().numTensors;
const a = useDefaultShapeAndType ? tf.tensor({buffer: aBuffer}) :
tf.tensor({buffer: aBuffer}, shape, dtype);
const webGPUData = {buffer: aBuffer, zeroCopy};
const a = useDefaultShapeAndType ? tf.tensor(webGPUData) :
tf.tensor(webGPUData, shape, dtype);
if (zeroCopy !== true) {
aBuffer.destroy();
}
Expand Down Expand Up @@ -476,9 +461,9 @@ function createTensorFromGPUTest(zeroCopy = false) {
const device = webGPUBackend.device;
const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
const dtype = 'float32';
const aBuffer = createReadonlyGPUBufferFromData(device, aData, dtype);
const aBuffer = createGPUBufferFromData(device, aData, dtype);
const shape: number[] = [aData.length];
const a = tf.tensor({buffer: aBuffer}, shape, dtype);
const a = tf.tensor({buffer: aBuffer, zeroCopy}, shape, dtype);
if (zeroCopy !== true) {
aBuffer.destroy();
}
Expand All @@ -493,12 +478,13 @@ function createTensorFromGPUTest(zeroCopy = false) {
const device = webGPUBackend.device;
const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
const dtype = 'float32';
const aBuffer = createReadonlyGPUBufferFromData(device, aData, dtype);
const aBuffer = createGPUBufferFromData(device, aData, dtype);
const startNumBytes = tf.memory().numBytes;
const startNumTensors = tf.memory().numTensors;
const shape: number[] = [aData.length];
const a = tf.tensor({buffer: aBuffer}, shape, dtype);
const b = tf.tensor({buffer: aBuffer}, shape, dtype);
const webGPUData = {buffer: aBuffer, zeroCopy};
const a = tf.tensor(webGPUData, shape, dtype);
const b = tf.tensor(webGPUData, shape, dtype);
if (zeroCopy !== true) {
aBuffer.destroy();
}
Expand All @@ -523,13 +509,14 @@ function createTensorFromGPUTest(zeroCopy = false) {
const device = webGPUBackend.device;
const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
const dtype = 'float32';
const aBuffer = createReadonlyGPUBufferFromData(device, aData, dtype);
const aBuffer = createGPUBufferFromData(device, aData, dtype);
const startNumBytes = tf.memory().numBytes;
const startNumTensors = tf.memory().numTensors;
// GPUBuffer.size is bigger than shape size
const shape: number[] = [aData.length - 1];
const a = tf.tensor({buffer: aBuffer}, shape, dtype);
const b = tf.tensor({buffer: aBuffer}, shape, dtype);
const webGPUData = {buffer: aBuffer, zeroCopy};
const a = tf.tensor(webGPUData, shape, dtype);
const b = tf.tensor(webGPUData, shape, dtype);
if (zeroCopy !== true) {
aBuffer.destroy();
}
Expand All @@ -553,7 +540,7 @@ function createTensorFromGPUTest(zeroCopy = false) {
const device = webGPUBackend.device;
const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
const dtype = 'float32';
const aBuffer = createReadonlyGPUBufferFromData(device, aData, dtype);
const aBuffer = createGPUBufferFromData(device, aData, dtype);
// Throw when GPUBuffer.size is smaller than shape size
const shape: number[] = [aData.length + 1];
const a = () => tf.tensor({buffer: aBuffer}, shape, dtype);
Expand All @@ -570,7 +557,7 @@ function createTensorFromGPUTest(zeroCopy = false) {
const aBuffer = createStagingGPUBufferFromData(device, aData, dtype);
// Throw when GPUBuffer usage is not correct.
const shape: number[] = [aData.length];
const a = () => tf.tensor({buffer: aBuffer}, shape, dtype);
const a = () => tf.tensor({buffer: aBuffer, zeroCopy}, shape, dtype);
expect(a).toThrowError();
aBuffer.destroy();
});
Expand All @@ -581,14 +568,5 @@ describeWebGPU('create tensor from GPUBuffer', () => {
});

describeWebGPU('create tensor from GPUBuffer with zero copy', () => {
let savedZeroCopyFlag = false;
beforeAll(() => {
savedZeroCopyFlag =
tf.env().get('WEBGPU_TENSOR_FROM_BUFFER_WITH_ZERO_COPY') as boolean;
tf.env().set('WEBGPU_TENSOR_FROM_BUFFER_WITH_ZERO_COPY', true);
});
afterAll(() => {
tf.env().set('WEBGPU_TENSOR_FROM_BUFFER_WITH_ZERO_COPY', savedZeroCopyFlag);
});
createTensorFromGPUTest(true);
});
5 changes: 0 additions & 5 deletions tfjs-backend-webgpu/src/flags_webgpu.ts
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,3 @@ ENV.registerFlag('WEBGPU_THRESHOLD_TO_INCREASE_WORKGROUPS_FOR_MATMUL', () => 0);
* Whether we will run im2col as a separate shader for convolution.
*/
ENV.registerFlag('WEBGPU_CONV_SEPARATE_IM2COL_SHADER', () => false);

/**
* Whether use zero copy when create tensor from GPUBuffer.
*/
ENV.registerFlag('WEBGPU_TENSOR_FROM_BUFFER_WITH_ZERO_COPY', () => false);
18 changes: 14 additions & 4 deletions tfjs-core/src/ops/tensor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,14 @@ import {makeTensor} from './tensor_ops_util';
* // This makes it possible for TF.js applications to avoid GPU / CPU sync.
* // For example, if your application includes a preprocessing step on the GPU,
* // you could upload the GPU output directly to TF.js, rather than first
* // downloading the values.
* // downloading the values. Unlike WebGL, this optionally supports zero copy
* // by WebGPUData.zeroCopy. When zeroCopy is false or undefined(default), this
* // passing GPUBuffer can be destroyed after tensor is created. When zeroCopy
* // is true, this GPUBuffer is bound directly by the tensor, so donot destroy
* // this GPUBuffer until all access is done.
*
* // Example for WebGPU:
* function createReadonlyGPUBufferFromData(device, data, dtype) {
* function createGPUBufferFromData(device, data, dtype) {
* const bytesPerElement = 4;
* const sizeInBytes = data.length * bytesPerElement;
*
Expand Down Expand Up @@ -144,8 +148,10 @@ import {makeTensor} from './tensor_ops_util';
* const aData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
* const bData = [1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4];
* const expected = [2, 4, 6, 8, 6, 8, 10, 12, 10, 12, 14, 16, 14, 16, 18, 20];
* const aBuffer = createReadonlyGPUBufferFromData(device, aData, dtype);
* const aBuffer = createGPUBufferFromData(device, aData, dtype);
* const shape = [aData.length];
* // To use zeroCopy, use {buffer: aBuffer, zeroCopy: true} instead and destroy
* // aBuffer untill all access is done.
* const a = tf.tensor({buffer: aBuffer}, shape, dtype);
* const b = tf.tensor(bData, shape, dtype);
* const result = tf.add(a, b);
Expand All @@ -172,7 +178,11 @@ import {makeTensor} from './tensor_ops_util';
* have: buffer, a `GPUBuffer`. The buffer must: 1. share the same `GPUDevice`
* with TFJS's WebGPU backend; 2. buffer.usage should at least support
* GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC; 3. buffer.size should not
* be smaller than the byte size of tensor shape.
* be smaller than the byte size of tensor shape. WebGPUData optionally supports
* zero copy by flag zeroCopy. When zeroCopy is false or undefined(default),
* this passing GPUBuffer can be destroyed after tensor is created. When
* zeroCopy is true, this GPUBuffer is bound directly by the tensor, so donot
* destroy this GPUBuffer until all access is done.
* @param shape The shape of the tensor. Optional. If not provided,
* it is inferred from `values`.
* @param dtype The data type.
Expand Down
11 changes: 8 additions & 3 deletions tfjs-core/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -184,10 +184,15 @@ export interface WebGLData {
}

/**
* Type for representing a buffer data to create a tensor. Use default usage
* GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC. If not specified at
* creating a tensor, tensor type is float32.
* Type for representing a buffer data to create a tensor. Buffer usage should
* at least support GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC. When
* zeroCopy is false or undefined (default), this GPUBuffer will be copied to
* the tensor's resource buffer. When zeroCopy is true, tensor will use this
* GPUBuffer as tensor's resource buffer, user should not destroy this GPUBuffer
* until all access is done. If not specified at creating a tensor, tensor type
* is float32.
*/
export interface WebGPUData {
buffer: GPUBuffer;
zeroCopy?: boolean;
}

0 comments on commit 8a7a39c

Please sign in to comment.