Skip to content
This repository has been archived by the owner on Feb 17, 2022. It is now read-only.

Commit

Permalink
added examples
Browse files Browse the repository at this point in the history
  • Loading branch information
pranay360 committed May 3, 2017
1 parent 451b92a commit 1ba87d2
Show file tree
Hide file tree
Showing 34 changed files with 4,742 additions and 517 deletions.
302 changes: 302 additions & 0 deletions Demo.ipynb

Large diffs are not rendered by default.

Binary file added Images/gen_102617084.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Images/gen_2230458748.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Images/gen_2461372011.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Images/gen_2472980433.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Images/gen_2537697530.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Images/gen_283252248.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Images/gen_3126981064.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Images/gen_3273757324.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Images/gen_3920626767.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Images/gen_4013421575.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Images/gen_4752984291.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Images/gen_532999240.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Images/gen_7125476937.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Images/gen_7148046575.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Images/gen_7526599338.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
17 changes: 17 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,20 @@ Show and Tell: A Neural Image Caption Generator

## License:
Protected Under BSD-3 Clause License.

##Some Examples:
![Alt text](/Images/gen_3126981064.jpg)
![Alt text](/Images/gen_7148046575.jpg)
![Alt text](/Images/gen_7526599338.jpg)
![Alt text](/Images/gen_4013421575.jpg)
![Alt text](/Images/gen_3273757324.jpg)
![Alt text](/Images/gen_102617084.jpg)
![Alt text](/Images/gen_2230458748.jpg)
![Alt text](/Images/gen_7125476937.jpg)
![Alt text](/Images/gen_532999240.jpg)
![Alt text](/Images/gen_4752984291.jpg)
![Alt text](/Images/gen_283252248.jpg)
![Alt text](/Images/gen_3920626767.jpg)
![Alt text](/Images/gen_2472980433.jpg)
![Alt text](/Images/gen_2537697530.jpg)
![Alt text](/Images/gen_2461372011.jpg)
63 changes: 39 additions & 24 deletions caption_generator.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import matplotlib.pyplot as plt
from random import shuffle
from convfeatures import *
import tensorflow as tf
from PIL import Image
import numpy as np
import pickle
import sys
Expand Down Expand Up @@ -57,6 +59,7 @@ def __init__(self, config, data=None):
self.idxtow = dict(zip(self.wtoidx.values(), self.wtoidx.keys()))
self.model()
self.image_features, self.IDs = self.build_decode_graph()
self.load_image=config.load_image
if not self.batch_decode:
self.io = build_prepro_graph()
self.sess = self.init_decode()
Expand Down Expand Up @@ -226,6 +229,7 @@ def build_decode_graph(self):
pred_ID = tf.nn.embedding_lookup(
self.word_embedding['weights'], predicted_next_idx)
pred_ID = pred_ID + self.word_embedding['biases']
predicted_next_idx = tf.cast(predicted_next_idx, tf.int32, name="word_"+str(i))
IDs.append(predicted_next_idx)

with open("model/Decoder/DecoderOutputs.txt", 'w') as f:
Expand Down Expand Up @@ -254,7 +258,6 @@ def train(self, loss, inp_dict):
print "Initializing Training"
init = tf.global_variables_initializer()
sess.run(init)

if self.resume is 1:
print "Loading Previously Trained Model"
print self.current_epoch, "Out of", self.nb_epochs, "Completed in previous run."
Expand All @@ -268,7 +271,9 @@ def train(self, loss, inp_dict):
sys.exit(0)
writer = tf.summary.FileWriter(
"model/log_dir/", graph=tf.get_default_graph())

for epoch in range(self.current_epoch, self.nb_epochs):
loss=[]
idx = np.random.permutation(self.features.shape[0])
self.captions = self.captions[idx]
self.features = self.features[idx]
Expand All @@ -284,16 +289,16 @@ def train(self, loss, inp_dict):
writer.add_summary(summary, step)
if step % 100 == 0:
print epoch, ": Global Step:", step, "\tLoss: ", current_loss

loss.append(current_loss)
print
print "Epoch: ", epoch, "\tCurrent Loss: ", current_loss
print "Epoch: ", epoch, "\tAverage Loss: ", np.mean(loss)
print "\nSaving Model..\n"
saver.save(sess, "./model/model.ckpt", global_step=global_step)
np.save("model/save", (epoch, step))

def init_decode(self):
saver = tf.train.Saver()
ckpt_file = "./model/model.ckpt-" + str(self.current_step)
ckpt_file = "./model/model.ckpt-" + str(self.current_step) #str(89994)
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
Expand All @@ -307,30 +312,40 @@ def decode(self, path):
self.image_features: features})
sentence = " ".join(self.IDs_to_Words(self.idxtow, caption_IDs))
sentence = sentence.split("</S>")[0]
print "Caption:", sentence
print
if self.load_image:
plt.imshow(Image.open(path))
plt.axis("off")
plt.title(sentence, fontsize='10', loc='left')
name=path.split("/")[-1]
plt.savefig("./results/"+"gen_"+name)
plt.show()
else:
print sentence
if self.savedecoder:
saver = tf.train.Saver()
saver.save(self.sess, "model/Decoder/model.ckpt")

def batch_decode(self, features):
#return path, sentence

def batch_decoder(self, filenames, features):
saver = tf.train.Saver()
ckpt_file = "./model/model.ckpt-" + str(self.current_step)
sentences = []
with tf.Session() as sess:
init = tf.global_variables_initializer()
sess.run(init)
saver.restore(sess, ckpt_file)
for i, feat in enumerate(features[:100]):
feat = np.reshape(feat, newshape=(1, 1536))
caption_IDs = sess.run(
self.IDs, feed_dict={
self.image_features: feat})
sentence = " ".join(
self.IDs_to_Words(
self.idxtow, caption_IDs))
sentence = sentence.split("</S>")[0]
sentences.append(sentence)
if i % 1000 == 0:
print "Progress", i, "out of", features.shape[0]
return sentences
filenames = np.unique(filenames)
with open("model/Decoder/Generated_Captions.txt", 'w') as f:
with tf.Session() as sess:
init = tf.global_variables_initializer()
sess.run(init)
saver.restore(sess, ckpt_file)
for i, feat in enumerate(features):
feat = np.reshape(feat, newshape=(1, 1536))
caption_IDs = sess.run(
self.IDs, feed_dict={
self.image_features: feat})
sentence = " ".join(
self.IDs_to_Words(
self.idxtow, caption_IDs))
sentence = sentence.split("</S>")[0]
if i % 1000 == 0:
print "Progress", i, "out of", features.shape[0]
f.write(filenames[i] + "\t" + sentence + "\n")
1 change: 1 addition & 0 deletions configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,6 @@ def __init__(self, args):
self.batch_decode = False
self.mode = args["mode"]
self.resume = args["resume"]
self.load_image = args.get("load_image")
self.saveencoder = bool(args["saveencoder"])
self.savedecoder = bool(args["savedecoder"])
20 changes: 12 additions & 8 deletions convfeatures.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@
import numpy as np
import os


img_path = "Dataset/flickr30k-images/"
files = sorted(np.array(os.listdir("Dataset/flickr30k-images/")))

batch_size = 10
n_batch = len(files) / batch_size
img_path = "Dataset/flickr30k-images/"
try:
files = sorted(np.array(os.listdir("Dataset/flickr30k-images/")))
n_batch = len(files) / batch_size
except:
pass

with open('ConvNets/inception_v4.pb', 'rb') as f:
fileContent = f.read()
Expand All @@ -22,6 +23,7 @@
"import/InceptionV4/Logits/AvgPool_1a/AvgPool:0")



'''
OLD PRE-PROCESSING MODULES : SLOW
import cv2
Expand Down Expand Up @@ -53,7 +55,7 @@ def old_load_image(x, new_h=299, new_w=299):


def build_prepro_graph():
input_file = tf.placeholder(dtype=tf.string, name="InputImage")
input_file = tf.placeholder(dtype=tf.string, name="InputFile")
image_file = tf.read_file(input_file)
jpg = tf.image.decode_jpeg(image_file, channels=3)
png = tf.image.decode_png(image_file, channels=3)
Expand Down Expand Up @@ -82,8 +84,8 @@ def load_next_batch(sess, io):
batch = batch.reshape((batch_size, 299, 299, 3))
yield batch


def forward_pass(io):
global output_layer
with tf.Session() as sess:
init = tf.global_variables_initializer()
sess.run(init)
Expand Down Expand Up @@ -114,6 +116,8 @@ def forward_pass(io):


def get_features(sess, io, img, saveencoder=False):
global output_layer
output_layer = tf.reshape(output_layer, [1,1536], name="Output_Features")
image = load_image(sess, io, img)
feed_dict = {input_layer: image}
prob = sess.run(output_layer, feed_dict=feed_dict)
Expand All @@ -125,7 +129,7 @@ def get_features(sess, io, img, saveencoder=False):
f.write(t + "\n")
saver = tf.train.Saver()
saver.save(sess, "model/Encoder/model.ckpt")
return prob[0][0]
return prob

if __name__ == "__main__":
print "#Images:", len(files)
Expand Down
72 changes: 72 additions & 0 deletions eval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import nltk
import pandas as pd
import numpy as np


path_to_reference = 'Dataset/Validation_Captions.txt' # df -> image_id:str caption:str len(5000)
path_to_model = 'model/Decoder/Generated_Captions.txt'

with open(path_to_model) as f:
model_data = f.readlines()
model_filenames=[caps.split('\t')[0] for caps in model_data]
model_captions = [caps.replace('\n', '').split('\t')[1] for caps in model_data]

with open(path_to_reference, 'r') as f:
ref_data = f.readlines()
reference_filenames = [caps.split('\t')[0].split('#')[0] for caps in ref_data]
reference_captions = [caps.replace('\n', '').split('\t')[1] for caps in ref_data]

df = pd.DataFrame()
df['image'] = reference_filenames
df['caption'] = reference_captions
df.caption = df.caption.str.decode('utf').str.split()
df = pd.DataFrame(data = {'image':list(df.image.unique()),'caption':list(df.groupby('image')['caption'].apply(list))})[:len(model_captions)]

bleu1_scores = []
bleu2_scores = []
bleu3_scores = []
bleu4_scores = []
index1=None
index2=None

for i, row in df.iterrows():
model = model_captions[i].split()
reference = row.caption
try:
score1 = nltk.translate.bleu_score.sentence_bleu(reference, model, weights=[1.0])
score2 = nltk.translate.bleu_score.sentence_bleu(reference, model, weights=[0.5,0.5])
score3 = nltk.translate.bleu_score.sentence_bleu(reference, model, weights=[1.0/3,1.0/3,1-2*(1.0/3)])
score4 = nltk.translate.bleu_score.sentence_bleu(reference, model)
bleu1_scores.append(score1)
bleu2_scores.append(score2)
bleu3_scores.append(score3)
bleu4_scores.append(score4)
if i%10000 == 0 and i!=0:
print (float(i)/df.shape[0])*100,"%"," done"
except:
index1=df.index[i]
index2=i
print "Invalid Caption Generated for: ", model_filenames[i]

print "\nMean Sentence-Level BLEU-1 score: ", np.mean(bleu1_scores)
print "Mean Sentence-Level BLEU-2 score: ", np.mean(bleu2_scores)
print "Mean Sentence-Level BLEU-3 score: ", np.mean(bleu3_scores)
print "Mean Sentence-Level BLEU-4 score: ", np.mean(bleu4_scores)


if index1 and index2:
df=df.drop([index1])
df=df.reset_index(drop=True)
del model_captions[index2]

references=df.caption
model_captions = [caption.split() for caption in model_captions]

score1 = nltk.translate.bleu_score.corpus_bleu(references,model_captions, weights=[1.0])
print "\n\nCorpus-Level BLEU-1 score: ", score1
score2 = nltk.translate.bleu_score.corpus_bleu(references,model_captions, weights=[0.5,0.5])
print "Corpus-Level BLEU-2 score: ", score2
score3 = nltk.translate.bleu_score.corpus_bleu(references,model_captions, weights=[1.0/3,1.0/3,1-2*(1.0/3)])
print "Corpus-Level BLEU-3 score: ", score3
score4 = nltk.translate.bleu_score.corpus_bleu(references,model_captions)
print "Corpus-Level BLEU-4 score: ", score4
Loading

0 comments on commit 1ba87d2

Please sign in to comment.