Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ROCm] Fixing RNNFowardV2 autotuning loop and unit tests #35230

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 0 additions & 1 deletion tensorflow/python/keras/BUILD
Expand Up @@ -819,7 +819,6 @@ cuda_py_test(
],
shard_count = 4,
tags = [
"no_rocm",
"no_windows_gpu",
],
xla_enable_strict_auto_jit = True,
Expand Down
140 changes: 100 additions & 40 deletions tensorflow/python/keras/layers/cudnn_recurrent.py
Expand Up @@ -31,6 +31,7 @@
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import gen_cudnn_rnn_ops
from tensorflow.python.ops import state_ops
from tensorflow.python.platform import build_info
from tensorflow.python.util.tf_export import keras_export


Expand Down Expand Up @@ -274,24 +275,51 @@ def _process_batch(self, inputs, initial_state):
input_h = initial_state[0]
input_h = array_ops.expand_dims(input_h, axis=0)

params = recurrent_v2._canonical_to_params( # pylint: disable=protected-access
weights=[
self.kernel[:, self.units:self.units * 2],
self.kernel[:, :self.units],
self.kernel[:, self.units * 2:],
self.recurrent_kernel[:, self.units:self.units * 2],
self.recurrent_kernel[:, :self.units],
self.recurrent_kernel[:, self.units * 2:],
],
biases=[
self.bias[self.units:self.units * 2],
self.bias[:self.units],
self.bias[self.units * 2:self.units * 3],
self.bias[self.units * 4:self.units * 5],
self.bias[self.units * 3:self.units * 4],
self.bias[self.units * 5:],
],
shape=self._vector_shape)
params = []
if build_info.is_rocm_build:
# ROCm MIOpen's weight sequence for GRU is same with the canonical format
# but different from that of Cudnn
# MIOpen/Canonical: [z, r, h] Cudnn: [r, z, h]
# z is update gate weights.
# r is reset gate weights.
# h is output gate weights.
params = recurrent_v2._canonical_to_params( # pylint: disable=protected-access
weights=[
self.kernel[:, :self.units],
self.kernel[:, self.units:self.units * 2],
self.kernel[:, self.units * 2:],
self.recurrent_kernel[:, :self.units],
self.recurrent_kernel[:, self.units:self.units * 2],
self.recurrent_kernel[:, self.units * 2:],
],
biases=[
self.bias[:self.units],
self.bias[self.units:self.units * 2],
self.bias[self.units * 2:self.units * 3],
self.bias[self.units * 3:self.units * 4],
self.bias[self.units * 4:self.units * 5],
self.bias[self.units * 5:],
],
shape=self._vector_shape)
else:
params = recurrent_v2._canonical_to_params( # pylint: disable=protected-access
weights=[
self.kernel[:, self.units:self.units * 2],
self.kernel[:, :self.units],
self.kernel[:, self.units * 2:],
self.recurrent_kernel[:, self.units:self.units * 2],
self.recurrent_kernel[:, :self.units],
self.recurrent_kernel[:, self.units * 2:],
],
biases=[
self.bias[self.units:self.units * 2],
self.bias[:self.units],
self.bias[self.units * 2:self.units * 3],
self.bias[self.units * 4:self.units * 5],
self.bias[self.units * 3:self.units * 4],
self.bias[self.units * 5:],
],
shape=self._vector_shape)

args = {
'input': inputs,
Expand Down Expand Up @@ -472,28 +500,60 @@ def _process_batch(self, inputs, initial_state):
input_h = array_ops.expand_dims(input_h, axis=0)
input_c = array_ops.expand_dims(input_c, axis=0)

params = recurrent_v2._canonical_to_params( # pylint: disable=protected-access
weights=[
self.kernel[:, :self.units],
self.kernel[:, self.units:self.units * 2],
self.kernel[:, self.units * 2:self.units * 3],
self.kernel[:, self.units * 3:],
self.recurrent_kernel[:, :self.units],
self.recurrent_kernel[:, self.units:self.units * 2],
self.recurrent_kernel[:, self.units * 2:self.units * 3],
self.recurrent_kernel[:, self.units * 3:],
],
biases=[
self.bias[:self.units],
self.bias[self.units:self.units * 2],
self.bias[self.units * 2:self.units * 3],
self.bias[self.units * 3:self.units * 4],
self.bias[self.units * 4:self.units * 5],
self.bias[self.units * 5:self.units * 6],
self.bias[self.units * 6:self.units * 7],
self.bias[self.units * 7:],
],
shape=self._vector_shape)
params = []
if build_info.is_rocm_build:
# ROCm MIOpen's weight sequence for LSTM is different from both canonical
# and Cudnn format
# MIOpen: [i, f, o, c] Cudnn/Canonical: [i, f, c, o]
# i is input gate weights.
# f is forget gate weights.
# o is output gate weights.
# c is cell gate weights.
params = recurrent_v2._canonical_to_params( # pylint: disable=protected-access
weights=[
self.kernel[:, :self.units],
self.kernel[:, self.units:self.units * 2],
self.kernel[:, self.units * 3:],
self.kernel[:, self.units * 2:self.units * 3],
self.recurrent_kernel[:, :self.units],
self.recurrent_kernel[:, self.units:self.units * 2],
self.recurrent_kernel[:, self.units * 3:],
self.recurrent_kernel[:, self.units * 2:self.units * 3],
],
biases=[
self.bias[:self.units],
self.bias[self.units:self.units * 2],
self.bias[self.units * 3:self.units * 4],
self.bias[self.units * 2:self.units * 3],
self.bias[self.units * 4:self.units * 5],
self.bias[self.units * 5:self.units * 6],
self.bias[self.units * 7:],
self.bias[self.units * 6:self.units * 7],
],
shape=self._vector_shape)
else:
params = recurrent_v2._canonical_to_params( # pylint: disable=protected-access
weights=[
self.kernel[:, :self.units],
self.kernel[:, self.units:self.units * 2],
self.kernel[:, self.units * 2:self.units * 3],
self.kernel[:, self.units * 3:],
self.recurrent_kernel[:, :self.units],
self.recurrent_kernel[:, self.units:self.units * 2],
self.recurrent_kernel[:, self.units * 2:self.units * 3],
self.recurrent_kernel[:, self.units * 3:],
],
biases=[
self.bias[:self.units],
self.bias[self.units:self.units * 2],
self.bias[self.units * 2:self.units * 3],
self.bias[self.units * 3:self.units * 4],
self.bias[self.units * 4:self.units * 5],
self.bias[self.units * 5:self.units * 6],
self.bias[self.units * 6:self.units * 7],
self.bias[self.units * 7:],
],
shape=self._vector_shape)

args = {
'input': inputs,
Expand Down
2 changes: 2 additions & 0 deletions tensorflow/python/keras/layers/lstm_v2_test.py
Expand Up @@ -570,6 +570,8 @@ def test_return_sequences_LSTM(self):
input_shape=(num_samples, timesteps, embedding_dim))

def test_float64_LSTM(self):
if test.is_built_with_rocm:
self.skipTest("Double type is yet not supported in ROCm")
num_samples = 2
timesteps = 3
embedding_dim = 4
Expand Down