diff --git a/models/modules.py b/models/modules.py index 44a0c11..ad7a518 100644 --- a/models/modules.py +++ b/models/modules.py @@ -8,7 +8,7 @@ def prenet(inputs, is_training, layer_sizes, scope=None): with tf.variable_scope(scope or 'prenet'): for i, size in enumerate(layer_sizes): dense = tf.layers.dense(x, units=size, activation=tf.nn.relu, name='dense_%d' % (i+1)) - x = tf.layers.dropout(dense, rate=drop_rate, name='dropout_%d' % (i+1)) + x = tf.layers.dropout(dense, rate=drop_rate, training=is_training, name='dropout_%d' % (i+1)) return x diff --git a/models/tacotron.py b/models/tacotron.py index 75cd627..954b647 100644 --- a/models/tacotron.py +++ b/models/tacotron.py @@ -49,10 +49,13 @@ def initialize(self, inputs, input_lengths, mel_targets=None, linear_targets=Non # Attention attention_cell = AttentionWrapper( - DecoderPrenetWrapper(GRUCell(hp.attention_depth), is_training, hp.prenet_depths), + GRUCell(hp.attention_depth), BahdanauAttention(hp.attention_depth, encoder_outputs), alignment_history=True, output_attention=False) # [N, T_in, attention_depth=256] + + # Apply prenet before concatenation in AttentionWrapper. + attention_cell = DecoderPrenetWrapper(attention_cell, is_training, hp.prenet_depths) # Concatenate attention context vector and RNN cell output into a 2*attention_depth=512D vector. concat_cell = ConcatOutputAndAttentionWrapper(attention_cell) # [N, T_in, 2*attention_depth=512]