-
Notifications
You must be signed in to change notification settings - Fork 102
/
sampler.py
197 lines (144 loc) · 8.12 KB
/
sampler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
#!/usr/bin/env python
'''
Anh Nguyen <[email protected]>
2017
'''
import os, sys
os.environ['GLOG_minloglevel'] = '2' # suprress Caffe verbose prints
import settings
sys.path.insert(0, settings.caffe_root)
import caffe
import numpy as np
from numpy.linalg import norm
import scipy.misc, scipy.io
import util
class Sampler(object):
def backward_from_x_to_h(self, generator, diff, start, end):
'''
Backpropagate the gradient from the image (start) back to the latent space (end) of the generator network.
'''
dst = generator.blobs[end]
dst.diff[...] = diff
generator.backward(start=end)
g = generator.blobs[start].diff.copy()
dst.diff.fill(0.) # reset objective after each step
return g
def h_autoencoder_grad(self, h, encoder, decoder, gen_out_layer, topleft, inpainting):
'''
Compute the gradient of the energy of P(input) wrt input, which is given by decode(encode(input))-input {see Alain & Bengio, 2014}.
Specifically, we compute E(G(h)) - h.
Note: this is an "upside down" auto-encoder for h that goes h -> x -> h with G modeling h -> x and E modeling x -> h.
'''
generated = encoder.forward(feat=h)
x = encoder.blobs[gen_out_layer].data.copy() # 256x256
# Crop from 256x256 to 227x227
image_size = decoder.blobs['data'].shape # (1, 3, 227, 227)
cropped_x = x[:,:,topleft[0]:topleft[0]+image_size[2], topleft[1]:topleft[1]+image_size[3]]
# Mask the image when inpainting
if inpainting is not None:
cropped_x = util.apply_mask(img=cropped_x, mask=inpainting['mask'], context=inpainting['image'])
# Push this 227x227 image through net
decoder.forward(data=cropped_x)
code = decoder.blobs['fc6'].data
g = code - h
return g
def sampling( self, condition_net, image_encoder, image_generator,
gen_in_layer, gen_out_layer, start_code,
n_iters, lr, lr_end, threshold,
layer, conditions, #units=None, xy=0,
epsilon1=1, epsilon2=1, epsilon3=1e-10,
inpainting=None, # in-painting args
output_dir=None, reset_every=0, save_every=1):
# Get the input and output sizes
image_shape = condition_net.blobs['data'].data.shape
generator_output_shape = image_generator.blobs[gen_out_layer].data.shape
encoder_input_shape = image_encoder.blobs['data'].data.shape
# Calculate the difference between the input image of the condition net
# and the output image from the generator
image_size = util.get_image_size(image_shape)
generator_output_size = util.get_image_size(generator_output_shape)
encoder_input_size = util.get_image_size(encoder_input_shape)
# The top left offset to crop the output image to get a 227x227 image
topleft = util.compute_topleft(image_size, generator_output_size)
topleft_DAE = util.compute_topleft(encoder_input_size, generator_output_size)
src = image_generator.blobs[gen_in_layer] # the input feature layer of the generator
# Make sure the layer size and initial vector size match
assert src.data.shape == start_code.shape
# Variables to store the best sample
last_xx = np.zeros(image_shape) # best image
last_prob = -sys.maxint # highest probability
h = start_code.copy()
condition_idx = 0
list_samples = []
i = 0
while True:
step_size = lr + ((lr_end - lr) * i) / n_iters
condition = conditions[condition_idx] # Select a class
# 1. Compute the epsilon1 term ---
# compute gradient d log(p(h)) / dh per DAE results in Alain & Bengio 2014
d_prior = self.h_autoencoder_grad(h=h, encoder=image_generator, decoder=image_encoder, gen_out_layer=gen_out_layer, topleft=topleft_DAE, inpainting=inpainting)
# 2. Compute the epsilon2 term ---
# Push the code through the generator to get an image x
image_generator.blobs["feat"].data[:] = h
generated = image_generator.forward()
x = generated[gen_out_layer].copy() # 256x256
# Crop from 256x256 to 227x227
cropped_x = x[:,:,topleft[0]:topleft[0]+image_size[0], topleft[1]:topleft[1]+image_size[1]]
cropped_x_copy = cropped_x.copy()
if inpainting is not None:
cropped_x = util.apply_mask(img=cropped_x, mask=inpainting['mask'], context=inpainting['image'])
# Forward pass the image x to the condition net up to an unit k at the given layer
# Backprop the gradient through the condition net to the image layer to get a gradient image
d_condition_x, prob, info = self.forward_backward_from_x_to_condition(net=condition_net, end=layer, image=cropped_x, condition=condition)
if inpainting is not None:
# Mask out the class gradient image
d_condition_x[:] *= inpainting["mask"]
# An additional objective for matching the context image
d_context_x256 = np.zeros_like(x.copy())
d_context_x256[:,:,topleft[0]:topleft[0]+image_size[0], topleft[1]:topleft[1]+image_size[1]] = (inpainting["image"] - cropped_x_copy) * inpainting["mask_neg"]
d_context_h = self.backward_from_x_to_h(generator=image_generator, diff=d_context_x256, start=gen_in_layer, end=gen_out_layer)
# Put the gradient back in the 256x256 format
d_condition_x256 = np.zeros_like(x)
d_condition_x256[:,:,topleft[0]:topleft[0]+image_size[0], topleft[1]:topleft[1]+image_size[1]] = d_condition_x.copy()
# Backpropagate the above gradient all the way to h (through generator)
# This gradient 'd_condition' is d log(p(y|h)) / dh (the epsilon2 term in Eq. 11 in the paper)
d_condition = self.backward_from_x_to_h(generator=image_generator, diff=d_condition_x256, start=gen_in_layer, end=gen_out_layer)
self.print_progress(i, info, condition, prob, d_condition)
# 3. Compute the epsilon3 term ---
noise = np.zeros_like(h)
if epsilon3 > 0:
noise = np.random.normal(0, epsilon3, h.shape) # Gaussian noise
# Update h according to Eq.11 in the paper
d_h = epsilon1 * d_prior + epsilon2 * d_condition + noise
# Plus the optional epsilon4 for matching the context region when in-painting
if inpainting is not None:
d_h += inpainting["epsilon4"] * d_context_h
h += step_size/np.abs(d_h).mean() * d_h
h = np.clip(h, a_min=0, a_max=30) # Keep the code within a realistic range
# Reset the code every N iters (for diversity when running a long sampling chain)
if reset_every > 0 and i % reset_every == 0 and i > 0:
h = np.random.normal(0, 1, h.shape)
# Experimental: For sample diversity, it's a good idea to randomly pick epsilon1 as well
epsilon1 = np.random.uniform(low=1e-6, high=1e-2)
# Save every sample
last_xx = cropped_x.copy()
last_prob = prob
# Filter samples based on threshold or every N iterations
if save_every > 0 and i % save_every == 0 and prob > threshold:
name = "%s/samples/%05d.jpg" % (output_dir, i)
label = self.get_label(condition)
list_samples.append( (last_xx.copy(), name, label) )
# Stop if grad is 0
if norm(d_h) == 0:
print " d_h is 0"
break
# Randomly sample a class every N iterations
if i > 0 and i % n_iters == 0:
condition_idx += 1
if condition_idx == len(conditions):
break
i += 1 # Next iter
# returning the last sample
print "-------------------------"
print "Last sample: prob [%s] " % last_prob
return last_xx, list_samples