diff --git a/models/modules.py b/models/modules.py
index 44a0c11..ad7a518 100644
--- a/models/modules.py
+++ b/models/modules.py
@@ -8,7 +8,7 @@ def prenet(inputs, is_training, layer_sizes, scope=None):
   with tf.variable_scope(scope or 'prenet'):
     for i, size in enumerate(layer_sizes):
       dense = tf.layers.dense(x, units=size, activation=tf.nn.relu, name='dense_%d' % (i+1))
-      x = tf.layers.dropout(dense, rate=drop_rate, name='dropout_%d' % (i+1))
+      x = tf.layers.dropout(dense, rate=drop_rate, training=is_training, name='dropout_%d' % (i+1))
   return x
 
 
diff --git a/models/tacotron.py b/models/tacotron.py
index 75cd627..954b647 100644
--- a/models/tacotron.py
+++ b/models/tacotron.py
@@ -49,10 +49,13 @@ def initialize(self, inputs, input_lengths, mel_targets=None, linear_targets=Non
 
       # Attention
       attention_cell = AttentionWrapper(
-        DecoderPrenetWrapper(GRUCell(hp.attention_depth), is_training, hp.prenet_depths),
+        GRUCell(hp.attention_depth),
         BahdanauAttention(hp.attention_depth, encoder_outputs),
         alignment_history=True,
         output_attention=False)                                                  # [N, T_in, attention_depth=256]
+      
+      # Apply prenet before concatenation in AttentionWrapper.
+      attention_cell = DecoderPrenetWrapper(attention_cell, is_training, hp.prenet_depths)
 
       # Concatenate attention context vector and RNN cell output into a 2*attention_depth=512D vector.
       concat_cell = ConcatOutputAndAttentionWrapper(attention_cell)              # [N, T_in, 2*attention_depth=512]