-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.html
377 lines (358 loc) · 20.2 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
<!DOCTYPE html>
<html>
<head>
<style>
td,
th {
border: 0px solid black;
}
img {
padding: 5px;
}
</style>
<title>It's All About Your Sketch: Democratising Sketch Control in Diffusion Models</title>
<link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro" rel="stylesheet">
<link rel="stylesheet" href="./static/css/bulma.min.css">
<link rel="stylesheet" href="./static/css/bulma-carousel.min.css">
<link rel="stylesheet" href="./static/css/bulma-slider.min.css">
<link rel="stylesheet" href="./static/css/fontawesome.all.min.css">
<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css">
<link rel="stylesheet" href="./static/css/index.css">
<link rel="icon" href="./static/images/favicon.svg">
<link rel="stylesheet" href="https://unpkg.com/image-compare-viewer/dist/image-compare-viewer.min.css">
<link rel="stylesheet" href="css/app.css">
<link rel="stylesheet" href="css/bootstrap.min.css">
<script src="https://unpkg.com/image-compare-viewer/dist/image-compare-viewer.min.js"></script>
<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
<script defer src="./static/js/fontawesome.all.min.js"></script>
<script src="./static/js/bulma-carousel.min.js"></script>
<script src="./static/js/bulma-slider.min.js"></script>
<script src="./static/js/index.js"></script>
</head>
<section class="hero">
<div class="hero-body">
<div class="container is-max-desktop">
<div class="columns is-centered">
<div class="column has-text-centered">
<h1 class="title is-1 publication-title" , style="color:purple;">It's All About Your Sketch:</h1>
<h1 class="title is-4 publication-title">Democratising Sketch Control in Diffusion Models</h1>
<div class="is-size-5 publication-authors">
<span class="author-block">
<a href="https://subhadeepkoley.github.io/">Subhadeep Koley</a><sup>1,2</sup>,</span>
<span class="author-block">
<a href="https://ayankumarbhunia.github.io/">Ayan Kumar Bhunia</a><sup>1</sup>,</span>
<span class="author-block">
<a href="https://scholar.google.com/citations?user=SoQ1vtAAAAAJ">Deeptanshu
Sekhri</a><sup>1</sup>,</span>
<span class="author-block">
<a href="https://aneeshan95.github.io/">Aneeshan Sain</a><sup>1</sup>,</span>
<span class="author-block">
<a href="http://www.pinakinathc.me/">Pinaki Nath Chowdhury</a><sup>1</sup>,</span>
<span class="author-block">
<a href="https://www.surrey.ac.uk/people/tao-xiang">Tao
Xiang</a><sup>1,2</sup>,</span>
<span class="author-block">
<a href="https://www.surrey.ac.uk/people/yi-zhe-song">Yi-Zhe Song</a><sup>1,2</sup></span>
</span>
</div>
<div class="is-size-5 publication-authors">
<span class="author-block"><sup>1</sup>SketchX, CVSSP, University of Surrey, United Kingdom</span>
<span class="author-block"><sup>2</sup>iFlyTek-Surrey Joint Research Centre on Artifiial
Intelligence</span>
</div>
<!-- <div class="column has-text-centered">
<a href="as">ICLR 2023</a>
</span>
</div> -->
<div class="column has-text-centered">
<div class="publication-links">
<!-- PDF Link. -->
<span class="link-block">
<a href="https://arxiv.org/pdf/2403.07234"
class="external-link button is-normal is-rounded is-dark">
<span class="icon">
<i class="fas fa-file-pdf"></i>
</span>
<span>Paper (PDF)</span>
</a>
</span>
<span class="link-block">
<a href="https://arxiv.org/abs/2403.07234"
class="external-link button is-normal is-rounded is-dark">
<span class="icon">
<i class="ai ai-arxiv"></i>
</span>
<span>arXiv</span>
</a>
</span>
<!-- Video Link. -->
<span class="link-block">
<a href="" class="external-link button is-normal is-rounded is-dark">
<span class="icon">
<i class="fab fa-youtube"></i>
</span>
<span>Video (YouTube)</span>
</a>
</span>
<!-- Dataset Link. -->
<span class="link-block">
<a href="static/images/KOLEY_04305.pdf" class="external-link button is-normal is-rounded is-dark">
<span class="icon">
<i class="far fa-images"></i>
</span>
<span>Poster</span>
</a>
</div>
</div>
</div>
</div>
</div>
</div>
</section>
<section class="hero teaser">
<div class="container is-max-desktop">
<div class="hero-body">
<img class="round" style="width:1500px" src="./static/images/teaser.png" />
<h2 class="subtitle has-text-centered">
<span class="dnerf"></span> (Top-left:) Comparison of images generated by our method with SGDM, ControlNet,
and T2I-Adapter. (Top-right:)
A set of photos generated by our method. Bottom: While existing methods generate realistic images from
pixel-perfect edgemaps,
they perform sub-optimally for freehand abstract sketches.
</h2>
</div>
</div>
</section>
<section class="section">
<div class="container is-max-desktop">
<!-- Abstract. -->
<div class="columns is-centered has-text-centered">
<div class="column is-four-fifths">
<h2 class="title is-3">Abstract</h2>
<div class="content has-text-justified">
This paper unravels the potential of sketches for diffusion models, addressing the deceptive promise of
direct sketch control in generative AI. We importantly democratise the process, enabling amateur sketches
to generate precise images, living up to the commitment of "what you sketch is what you get". A pilot
study underscores the necessity, revealing that deformities in existing models stem from
spatial-conditioning. To rectify this, we propose an abstraction-aware framework, utilising a sketch
adapter, adaptive time-step sampling, and discriminative guidance from a pre-trained fine-grained
sketch-based image retrieval model, working synergistically to reinforce fine-grained sketch-photo
association. Our approach operates seamlessly during inference without the need for textual prompts; a
simple, rough sketch akin to what you and I can create suffices! We welcome everyone to examine results
presented in the paper and its supplementary. Contributions include democratising sketch control,
introducing an abstraction-aware framework, and leveraging discriminative guidance, validated through
extensive experiments.
</p>
</div>
</div>
</div>
<section class="hero teaser">
<div class="container is-max-desktop">
<div class="hero-body">
<video width="1920" height="1080" controls loop>
<source src="./static/images/demo_comparison_video.mp4" type="video/mp4">
</video>
<h2 class="subtitle has-text-centered">
<span class="dnerf"></span> <strong>Live demo comparison with T2I-Adapter and ControlNet.</strong>
</h2>
</div>
</div>
</section>
<!-- <section class="hero teaser">
<div class="container is-max-desktop">
<div class="hero-body">
<iframe width="720" height="480" src="https://www.youtube.com/embed/k7xFbELpnv4?">
</iframe>
<h2 class="subtitle has-text-centered">
<span class="dnerf"></span>
</h2>
</div>
</div>
</section> -->
<!--/ Abstract. -->
<!-- Paper video. -->
<section class="section">
<div class="container is-max-desktop">
<!-- Abstract. -->
<div class="columns is-centered has-text-centered">
<div class="column is-four-fifths">
<h2 class="title is-3">Pilot Study</h2>
<div class="content has-text-justified">
</h2>
<center>
<img src="./static/images/pilot.png" alt="" border=0 height=600 width=2000></img></ </center>
<h5 class="subtitle has-text-centered">
Images generated by T2I-Adapter for different sketch-guidance factors (ω ∈ [0, 1]).
Determining the optimum ω to obtain
an ideal balance (green-bordered) between photorealism and sketch-fidelity requires manual
intervention and is sample-specific. A high
value of ω works well for less deformed sketches, while the same for an abstract sketch
produces deformed outputs and vice-versa.
</h5>
</div>
</div>
</div>
</section>
<section class="section">
<div class="container is-max-desktop">
<!-- Abstract. -->
<div class="columns is-centered has-text-centered">
<div class="column is-four-fifths">
<h2 class="title is-3">Architecture</h2>
<div class="content has-text-justified">
</h2>
<center>
<img src="./static/images/arch.png" alt="" border=0 height=300 width=1000></img></ </center>
<h5 class="subtitle has-text-centered">
Our overall training pipeline.
</h5 </div>
</div>
</div>
</section>
<section class="hero">
<div class="hero-body">
<div class="container is-max-desktop">
<!-- Abstract. -->
<div class="columns is-centered has-text-centered">
<div class="column is-four-fifths">
<h2 class="title is-3">Results</h2>
<div class="content has-text-justified">
<center>
<img src="static/images/qual_1.png" border=0 height=200 width=1500 />
</center>
<h5 class="subtitle has-text-centered">
Qualitative comparison with SOTA sketch-to-image generation models on Sketchy. For
ControlNet, T2I-Adapter,
and PITI, we use the fixed prompt “a photo of [CLASS]”, with [CLASS] replaced with
corresponding class-labels of the input sketches.
<br>
<br>
<center>
<img src="static/images/qual_2.png" border=0 height=150 width=800 />
</center>
<h5 class="subtitle has-text-centered">
Examples showing generalisation potential across different datasets (left) and
stroke-styles (right).
<br>
<br>
<center>
<img src="static/images/qual_3.png" border=0 height=400 width=900 />
</center>
<h5 class="subtitle has-text-centered">
Illustration of cross-model generalisation. Our method trained with SD v1.5, performs
well on other unseen SD variants (e.g., v1.4) without further fine-tuning.
<br>
<br>
<center>
<img src="static/images/qual_4.png" border=0 height=300 width=900 />
</center>
<h5 class="subtitle has-text-centered">
Examples depicting the effect of adding noisy strokes (left) and generation from
partially-completed sketches (right).
<br>
<br>
<center>
<img src="static/images/qual_5.png" border=0 height=300 width=900 />
</center>
<h5 class="subtitle has-text-centered">
Our method seamlessly transfers local semantic edits on input sketches into
output photos.
<br>
<br>
<center>
<img src="static/images/qual_6.png" border=0 height=600 width=900 />
</center>
<h5 class="subtitle has-text-centered">
Qualitative comparison with SOTAs. For ControlNet, T2I-Adapter, and PITI, we
use the fixed prompt “a photo of [CLASS]”, with [CLASS] replaced with
corresponding class-labels of the input sketches.
<br>
<br>
<center>
<img src="static/images/00028_image_grid.jpg" border=0 height=600
width=900 />
</center>
<h5 class="subtitle has-text-centered">
Images generated with our method.
<br>
<br>
<center>
<img src="static/images/00029_image_grid.jpg" border=0 height=600
width=900 />
</center>
<h5 class="subtitle has-text-centered">
Images generated with our method.
<br>
<br>
<center>
<img src="static/images/00030_image_grid.jpg" border=0 height=600
width=900 />
</center>
<h5 class="subtitle has-text-centered">
Images generated with our method.
<br>
<br>
<center>
<img src="static/images/00031_image_grid.jpg" border=0 height=600
width=900 />
</center>
<h5 class="subtitle has-text-centered">
Images generated with our method.
<br>
<br>
<center>
<img src="static/images/00032_image_grid.jpg" border=0
height=600 width=900 />
</center>
<h5 class="subtitle has-text-centered">
Images generated with our method.
<br>
<br>
<center>
<img src="static/images/00033_image_grid.jpg" border=0
height=600 width=900 />
</center>
<h5 class="subtitle has-text-centered">
Images generated with our method.
</div>
</div>
</div>
</div>
<section class="section" id="BibTeX">
<div class="container is-max-desktop content">
<h2 class="title">BibTeX</h2>
<pre><code>@inproceedings{koley2024handle,
title={{It's All About Your Sketch: Democratising Sketch Control in Diffusion Models}},
author={Koley, Subhadeep and Bhunia, Ayan Kumar and Sekhri, Deeptanshu and Sain, Aneeshan and Chowdhury, Pinaki Nath and Xiang, Tao and Song, Yi-Zhe},
booktitle={CVPR},
year={2024}
}</code></pre>
</div>
</section>
<script>
const viewers = document.querySelectorAll(".image-compare");
viewers.forEach((element) => {
let view = new ImageCompare(element, {
hoverStart: true,
addCircle: true
}).mount();
});
$(document).ready(function () {
var editor = CodeMirror.fromTextArea(document.getElementById("bibtex"), {
lineNumbers: false,
lineWrapping: true,
readOnly: true
});
$(function () {
$('[data-toggle="tooltip"]').tooltip()
})
});
</script>
<br>
<p style="text-align:center"> <img
src="https://badges.toozhao.com/badges/01HTTYS41N2AZGMDRW3N6W2P09/green.svg" /> </a></p>
<p style="text-align:center"> Copyright: <a href="https://creativecommons.org/licenses/by-nc-sa/4.0/"> CC
BY-NC-SA 4.0</a> © Subhadeep Koley | Last updated: 05 April 2024 | Good artists <a
href="https://nerfies.github.io/"> copy</a>, great artists steal.</a></p>
</body>
</html>