-
Notifications
You must be signed in to change notification settings - Fork 4
/
model.py
145 lines (111 loc) · 4.54 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import keras.backend as K
from keras.models import Model
from keras.layers import Input
from keras.layers import TimeDistributed
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Bidirectional
from keras.layers import Lambda
from keras.layers import Dropout
from keras.regularizers import l2
def ler(y_true, y_pred, **kwargs):
"""
Label Error Rate. For more information see 'tf.edit_distance'
"""
return tf.reduce_mean(tf.edit_distance(y_pred, y_true, **kwargs))
def decode(inputs, **kwargs):
""" Decodes a sequence of probabilities choosing the path with highest
probability of occur
# Arguments
is_greedy: if True (default) the greedy decoder will be used;
otherwise beam search decoder will be used
if is_greedy is False:
see the documentation of tf.nn.ctc_beam_search_decoder for more
options
# Inputs
A tuple (y_pred, seq_len) where:
y_pred is a tensor (N, T, C) where N is the bath size, T is the
maximum timestep and C is the number of classes (including the
blank label)
seq_len is a tensor (N,) that indicates the real number of
timesteps of each sequence
# Outputs
A sparse tensor with the top path decoded sequence
"""
# Little hack for load_model
import tensorflow as tf
is_greedy = kwargs.get('is_greedy', True)
y_pred, seq_len = inputs
seq_len = tf.cast(seq_len[:, 0], tf.int32)
y_pred = tf.transpose(y_pred, perm=[1, 0, 2])
if is_greedy:
decoded = tf.nn.ctc_greedy_decoder(y_pred, seq_len)[0][0]
else:
beam_width = kwargs.get('beam_width', 400)
top_paths = kwargs.get('top_paths', 1)
merge_repeated = kwargs.get('merge_repeated', True)
decoded = tf.nn.ctc_beam_search_decoder(y_pred, seq_len, beam_width,
top_paths,
merge_repeated)[0][0]
return decoded
def decode_output_shape(inputs_shape):
y_pred_shape, seq_len_shape = inputs_shape
return (y_pred_shape[:1], None)
def ctc_lambda_func(args):
""" CTC cost function
"""
y_pred, labels, inputs_length = args
# Little hack for load_model
import tensorflow as tf
return tf.nn.ctc_loss(labels,
tf.transpose(y_pred, perm=[1, 0, 2]),
inputs_length[:, 0])
def ctc_dummy_loss(y_true, y_pred):
""" Little hack to make CTC working with Keras
"""
return y_pred
def decoder_dummy_loss(y_true, y_pred):
""" Little hack to make CTC working with Keras
"""
return K.zeros((1,))
def sbrt2017(num_hiddens, var_dropout, dropout, weight_decay, num_features=39,
num_classes=28):
""" SBRT model
Reference:
[1] Gal, Y, "A Theoretically Grounded Application of Dropout in
Recurrent Neural Networks", 2015.
[2] Graves, Alex, Abdel-rahman Mohamed, and Geoffrey Hinton. "Speech
recognition with deep recurrent neural networks", 2013.
[6] Wu, Yonghui, et al. "Google's Neural Machine Translation System:
Bridging the Gap between Human and Machine Translation.", 2016.
"""
x = Input(name='inputs', shape=(None, num_features))
o = x
if dropout > 0.0:
o = Dropout(dropout)(o)
o = Bidirectional(LSTM(num_hiddens,
return_sequences=True,
W_regularizer=l2(weight_decay),
U_regularizer=l2(weight_decay),
dropout_W=var_dropout,
dropout_U=var_dropout,
consume_less='gpu'))(o)
if dropout > 0.0:
o = Dropout(dropout)(o)
o = TimeDistributed(Dense(num_classes,
W_regularizer=l2(weight_decay)))(o)
# Define placeholders
labels = Input(name='labels', shape=(None,), dtype='int32', sparse=True)
inputs_length = Input(name='inputs_length', shape=(None,), dtype='int32')
# Define a decoder
dec = Lambda(decode, output_shape=decode_output_shape,
arguments={'is_greedy': True}, name='decoder')
y_pred = dec([o, inputs_length])
ctc = Lambda(ctc_lambda_func, output_shape=(1,), name="ctc")
# Define loss as a layer
loss = ctc([o, labels, inputs_length])
return Model(input=[x, labels, inputs_length], output=[loss, y_pred])