added examples

neural-nuts · May 3, 2017 · 1ba87d2 · 1ba87d2
1 parent 451b92a
commit 1ba87d2
Show file tree

Hide file tree

Showing 34 changed files with 4,742 additions and 517 deletions.
diff --git a/Demo.ipynb b/Demo.ipynb
diff --git a/Images/gen_102617084.jpg b/Images/gen_102617084.jpg
diff --git a/Images/gen_2230458748.jpg b/Images/gen_2230458748.jpg
diff --git a/Images/gen_2461372011.jpg b/Images/gen_2461372011.jpg
diff --git a/Images/gen_2472980433.jpg b/Images/gen_2472980433.jpg
diff --git a/Images/gen_2537697530.jpg b/Images/gen_2537697530.jpg
diff --git a/Images/gen_283252248.jpg b/Images/gen_283252248.jpg
diff --git a/Images/gen_3126981064.jpg b/Images/gen_3126981064.jpg
diff --git a/Images/gen_3273757324.jpg b/Images/gen_3273757324.jpg
diff --git a/Images/gen_3920626767.jpg b/Images/gen_3920626767.jpg
diff --git a/Images/gen_4013421575.jpg b/Images/gen_4013421575.jpg
diff --git a/Images/gen_4752984291.jpg b/Images/gen_4752984291.jpg
diff --git a/Images/gen_532999240.jpg b/Images/gen_532999240.jpg
diff --git a/Images/gen_7125476937.jpg b/Images/gen_7125476937.jpg
diff --git a/Images/gen_7148046575.jpg b/Images/gen_7148046575.jpg
diff --git a/Images/gen_7526599338.jpg b/Images/gen_7526599338.jpg
diff --git a/README.md b/README.md
@@ -46,3 +46,20 @@ Show and Tell: A Neural Image Caption Generator
 
 ## License:
 Protected Under BSD-3 Clause License.
+
+##Some Examples:
+![Alt text](/Images/gen_3126981064.jpg)
+![Alt text](/Images/gen_7148046575.jpg)
+![Alt text](/Images/gen_7526599338.jpg)
+![Alt text](/Images/gen_4013421575.jpg)
+![Alt text](/Images/gen_3273757324.jpg)
+![Alt text](/Images/gen_102617084.jpg)
+![Alt text](/Images/gen_2230458748.jpg)
+![Alt text](/Images/gen_7125476937.jpg)
+![Alt text](/Images/gen_532999240.jpg)
+![Alt text](/Images/gen_4752984291.jpg)
+![Alt text](/Images/gen_283252248.jpg)
+![Alt text](/Images/gen_3920626767.jpg)
+![Alt text](/Images/gen_2472980433.jpg)
+![Alt text](/Images/gen_2537697530.jpg)
+![Alt text](/Images/gen_2461372011.jpg)
diff --git a/caption_generator.py b/caption_generator.py
@@ -1,6 +1,8 @@
+import matplotlib.pyplot as plt
 from random import shuffle
 from convfeatures import *
 import tensorflow as tf
+from PIL import Image
 import numpy as np
 import pickle
 import sys
@@ -57,6 +59,7 @@ def __init__(self, config, data=None):
             self.idxtow = dict(zip(self.wtoidx.values(), self.wtoidx.keys()))
             self.model()
             self.image_features, self.IDs = self.build_decode_graph()
+            self.load_image=config.load_image
             if not self.batch_decode:
                 self.io = build_prepro_graph()
                 self.sess = self.init_decode()
@@ -226,6 +229,7 @@ def build_decode_graph(self):
                 pred_ID = tf.nn.embedding_lookup(
                     self.word_embedding['weights'], predicted_next_idx)
                 pred_ID = pred_ID + self.word_embedding['biases']
+                predicted_next_idx = tf.cast(predicted_next_idx, tf.int32, name="word_"+str(i))
                 IDs.append(predicted_next_idx)
 
         with open("model/Decoder/DecoderOutputs.txt", 'w') as f:
@@ -254,7 +258,6 @@ def train(self, loss, inp_dict):
             print "Initializing Training"
             init = tf.global_variables_initializer()
             sess.run(init)
-
             if self.resume is 1:
                 print "Loading Previously Trained Model"
                 print self.current_epoch, "Out of", self.nb_epochs, "Completed in previous run."
@@ -268,7 +271,9 @@ def train(self, loss, inp_dict):
                     sys.exit(0)
             writer = tf.summary.FileWriter(
                 "model/log_dir/", graph=tf.get_default_graph())
+
             for epoch in range(self.current_epoch, self.nb_epochs):
+                loss=[]
                 idx = np.random.permutation(self.features.shape[0])
                 self.captions = self.captions[idx]
                 self.features = self.features[idx]
@@ -284,16 +289,16 @@ def train(self, loss, inp_dict):
                     writer.add_summary(summary, step)
                     if step % 100 == 0:
                         print epoch, ": Global Step:", step, "\tLoss: ", current_loss
-
+                    loss.append(current_loss)
                 print
-                print "Epoch: ", epoch, "\tCurrent Loss: ", current_loss
+                print "Epoch: ", epoch, "\tAverage Loss: ", np.mean(loss)
                 print "\nSaving Model..\n"
                 saver.save(sess, "./model/model.ckpt", global_step=global_step)
                 np.save("model/save", (epoch, step))
 
     def init_decode(self):
         saver = tf.train.Saver()
-        ckpt_file = "./model/model.ckpt-" + str(self.current_step)
+        ckpt_file = "./model/model.ckpt-" + str(self.current_step) #str(89994)
         sess = tf.Session()
         init = tf.global_variables_initializer()
         sess.run(init)
@@ -307,30 +312,40 @@ def decode(self, path):
                 self.image_features: features})
         sentence = " ".join(self.IDs_to_Words(self.idxtow, caption_IDs))
         sentence = sentence.split("</S>")[0]
-        print "Caption:", sentence
-        print
+        if self.load_image:
+            plt.imshow(Image.open(path))
+            plt.axis("off")
+            plt.title(sentence, fontsize='10', loc='left')
+            name=path.split("/")[-1]
+            plt.savefig("./results/"+"gen_"+name)
+            plt.show()
+        else:
+            print sentence
         if self.savedecoder:
             saver = tf.train.Saver()
             saver.save(self.sess, "model/Decoder/model.ckpt")
 
-    def batch_decode(self, features):
+        #return path, sentence
+
+    def batch_decoder(self, filenames, features):
         saver = tf.train.Saver()
         ckpt_file = "./model/model.ckpt-" + str(self.current_step)
         sentences = []
-        with tf.Session() as sess:
-            init = tf.global_variables_initializer()
-            sess.run(init)
-            saver.restore(sess, ckpt_file)
-            for i, feat in enumerate(features[:100]):
-                feat = np.reshape(feat, newshape=(1, 1536))
-                caption_IDs = sess.run(
-                    self.IDs, feed_dict={
-                        self.image_features: feat})
-                sentence = " ".join(
-                    self.IDs_to_Words(
-                        self.idxtow, caption_IDs))
-                sentence = sentence.split("</S>")[0]
-                sentences.append(sentence)
-                if i % 1000 == 0:
-                    print "Progress", i, "out of", features.shape[0]
-        return sentences
+        filenames = np.unique(filenames)
+        with open("model/Decoder/Generated_Captions.txt", 'w') as f:
+            with tf.Session() as sess:
+                init = tf.global_variables_initializer()
+                sess.run(init)
+                saver.restore(sess, ckpt_file)
+                for i, feat in enumerate(features):
+                    feat = np.reshape(feat, newshape=(1, 1536))
+                    caption_IDs = sess.run(
+                        self.IDs, feed_dict={
+                            self.image_features: feat})
+                    sentence = " ".join(
+                        self.IDs_to_Words(
+                            self.idxtow, caption_IDs))
+                    sentence = sentence.split("</S>")[0]
+                    if i % 1000 == 0:
+                        print "Progress", i, "out of", features.shape[0]
+                    f.write(filenames[i] + "\t" + sentence + "\n")
diff --git a/configuration.py b/configuration.py
@@ -18,5 +18,6 @@ def __init__(self, args):
         self.batch_decode = False
         self.mode = args["mode"]
         self.resume = args["resume"]
+        self.load_image = args.get("load_image")
         self.saveencoder = bool(args["saveencoder"])
         self.savedecoder = bool(args["savedecoder"])
diff --git a/convfeatures.py b/convfeatures.py
@@ -2,12 +2,13 @@
 import numpy as np
 import os
 
-
-img_path = "Dataset/flickr30k-images/"
-files = sorted(np.array(os.listdir("Dataset/flickr30k-images/")))
-
 batch_size = 10
-n_batch = len(files) / batch_size
+img_path = "Dataset/flickr30k-images/"
+try:
+    files = sorted(np.array(os.listdir("Dataset/flickr30k-images/")))
+    n_batch = len(files) / batch_size
+except:
+    pass
 
 with open('ConvNets/inception_v4.pb', 'rb') as f:
     fileContent = f.read()
@@ -22,6 +23,7 @@
     "import/InceptionV4/Logits/AvgPool_1a/AvgPool:0")
 
 
+
 '''
 OLD PRE-PROCESSING MODULES : SLOW
 import cv2
@@ -53,7 +55,7 @@ def old_load_image(x, new_h=299, new_w=299):
 
 
 def build_prepro_graph():
-    input_file = tf.placeholder(dtype=tf.string, name="InputImage")
+    input_file = tf.placeholder(dtype=tf.string, name="InputFile")
     image_file = tf.read_file(input_file)
     jpg = tf.image.decode_jpeg(image_file, channels=3)
     png = tf.image.decode_png(image_file, channels=3)
@@ -82,8 +84,8 @@ def load_next_batch(sess, io):
         batch = batch.reshape((batch_size, 299, 299, 3))
         yield batch
 
-
 def forward_pass(io):
+    global output_layer
     with tf.Session() as sess:
         init = tf.global_variables_initializer()
         sess.run(init)
@@ -114,6 +116,8 @@ def forward_pass(io):
 
 
 def get_features(sess, io, img, saveencoder=False):
+    global output_layer
+    output_layer = tf.reshape(output_layer, [1,1536], name="Output_Features")
     image = load_image(sess, io, img)
     feed_dict = {input_layer: image}
     prob = sess.run(output_layer, feed_dict=feed_dict)
@@ -125,7 +129,7 @@ def get_features(sess, io, img, saveencoder=False):
                 f.write(t + "\n")
         saver = tf.train.Saver()
         saver.save(sess, "model/Encoder/model.ckpt")
-    return prob[0][0]
+    return prob
 
 if __name__ == "__main__":
     print "#Images:", len(files)

diff --git a/eval.py b/eval.py
@@ -0,0 +1,72 @@
+import nltk
+import pandas as pd
+import numpy as np
+
+
+path_to_reference = 'Dataset/Validation_Captions.txt' # df -> image_id:str     caption:str     len(5000)
+path_to_model = 'model/Decoder/Generated_Captions.txt'
+
+with open(path_to_model) as f:
+  model_data = f.readlines()
+model_filenames=[caps.split('\t')[0] for caps in model_data]
+model_captions = [caps.replace('\n', '').split('\t')[1] for caps in model_data]
+
+with open(path_to_reference, 'r') as f:
+    ref_data = f.readlines()
+reference_filenames = [caps.split('\t')[0].split('#')[0] for caps in ref_data]
+reference_captions = [caps.replace('\n', '').split('\t')[1] for caps in ref_data]
+
+df = pd.DataFrame()
+df['image'] = reference_filenames
+df['caption'] = reference_captions
+df.caption = df.caption.str.decode('utf').str.split()
+df = pd.DataFrame(data = {'image':list(df.image.unique()),'caption':list(df.groupby('image')['caption'].apply(list))})[:len(model_captions)]
+
+bleu1_scores = []
+bleu2_scores = []
+bleu3_scores = []
+bleu4_scores = []
+index1=None
+index2=None
+
+for i, row in df.iterrows():
+  model = model_captions[i].split()
+  reference = row.caption
+  try:
+      score1 = nltk.translate.bleu_score.sentence_bleu(reference, model, weights=[1.0])
+      score2 = nltk.translate.bleu_score.sentence_bleu(reference, model, weights=[0.5,0.5])
+      score3 = nltk.translate.bleu_score.sentence_bleu(reference, model, weights=[1.0/3,1.0/3,1-2*(1.0/3)])
+      score4 = nltk.translate.bleu_score.sentence_bleu(reference, model)
+      bleu1_scores.append(score1)
+      bleu2_scores.append(score2)
+      bleu3_scores.append(score3)
+      bleu4_scores.append(score4)
+      if i%10000 == 0 and i!=0:
+          print (float(i)/df.shape[0])*100,"%"," done"
+  except:
+      index1=df.index[i]
+      index2=i
+      print "Invalid Caption Generated for: ", model_filenames[i]
+
+print "\nMean Sentence-Level BLEU-1 score: ", np.mean(bleu1_scores)
+print "Mean Sentence-Level BLEU-2 score: ", np.mean(bleu2_scores)
+print "Mean Sentence-Level BLEU-3 score: ", np.mean(bleu3_scores)
+print "Mean Sentence-Level BLEU-4 score: ", np.mean(bleu4_scores)
+
+
+if index1 and index2:
+    df=df.drop([index1])
+    df=df.reset_index(drop=True)
+    del model_captions[index2]
+
+references=df.caption
+model_captions = [caption.split() for caption in model_captions]
+
+score1 = nltk.translate.bleu_score.corpus_bleu(references,model_captions, weights=[1.0])
+print "\n\nCorpus-Level BLEU-1 score: ", score1
+score2 = nltk.translate.bleu_score.corpus_bleu(references,model_captions, weights=[0.5,0.5])
+print "Corpus-Level BLEU-2 score: ", score2
+score3 = nltk.translate.bleu_score.corpus_bleu(references,model_captions, weights=[1.0/3,1.0/3,1-2*(1.0/3)])
+print "Corpus-Level BLEU-3 score: ", score3
+score4 = nltk.translate.bleu_score.corpus_bleu(references,model_captions)
+print "Corpus-Level BLEU-4 score: ", score4