Skip to content

Commit

Permalink
Enhance PowerPaint (#2093)
Browse files Browse the repository at this point in the history
* Update gradio_PowerPaint.py

* Update gradio_PowerPaint.py

* Update gradio_PowerPaint.py

* Update gradio_PowerPaint.py

* Update README.md

* Update README.md

* Update gradio_PowerPaint.py

* Update gradio_PowerPaint.py

* Update README.md
  • Loading branch information
zhuang2002 authored Dec 18, 2023
1 parent ab610dd commit b08b6ca
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 73 deletions.
12 changes: 12 additions & 0 deletions projects/powerpaint/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,20 @@

This README provides a step-by-step guide to download the repository, set up the required virtual environment named "PowerPaint" using conda, and run PowerPaint with or without ControlNet.

## News

**December 18, 2023**

*Enhanced PowerPaint Model*

- We are delighted to announce the release of more stable model weights. These refined weights can now be accessed on [Hugging Face](https://huggingface.co/JunhaoZhuang/PowerPaint-v1/tree/main). The `gradio_PowerPaint.py` file and [Online Demo](https://openxlab.org.cn/apps/detail/rangoliu/PowerPaint) have also been updated as part of this release.


## Next

**Stronger Model Weights Coming Soon!**

________________
<img src='https://github.com/open-mmlab/mmagic/assets/12782558/acd01391-c73f-4997-aafd-0869aebcc915'/>

## Getting Started
Expand Down
150 changes: 77 additions & 73 deletions projects/powerpaint/gradio_PowerPaint.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
StableDiffusionInpaintPipeline as Pipeline
from pipeline.pipeline_PowerPaint_ControlNet import \
StableDiffusionControlNetInpaintPipeline as controlnetPipeline
from safetensors.torch import load_file
from transformers import DPTFeatureExtractor, DPTForDepthEstimation
from utils.utils import TokenizerWrapper, add_tokens

Expand All @@ -21,8 +20,7 @@
global pipe
pipe = Pipeline.from_pretrained(
'runwayml/stable-diffusion-inpainting',
torch_dtype=weight_dtype,
safety_checker=None)
torch_dtype=weight_dtype)
pipe.tokenizer = TokenizerWrapper(
from_pretrained='runwayml/stable-diffusion-v1-5',
subfolder='tokenizer',
Expand All @@ -34,14 +32,13 @@
placeholder_tokens=['P_ctxt', 'P_shape', 'P_obj'],
initialize_tokens=['a', 'a', 'a'],
num_vectors_per_token=10)
pipe.unet.load_state_dict(
load_file(
'./models/unet/diffusion_pytorch_model.safetensors', device='cuda'),
strict=False)
pipe.text_encoder.load_state_dict(
torch.load('./models/text_encoder/pytorch_model.bin'), strict=False)

from safetensors.torch import load_model
load_model(pipe.unet, "./models/unet/diffusion_pytorch_model.safetensors")
pipe.text_encoder.load_state_dict(torch.load("./models/text_encoder/pytorch_model.bin"), strict=False)
pipe = pipe.to('cuda')


depth_estimator = DPTForDepthEstimation.from_pretrained(
'Intel/dpt-hybrid-midas').to('cuda')
feature_extractor = DPTFeatureExtractor.from_pretrained(
Expand All @@ -51,7 +48,7 @@

global current_control
current_control = 'canny'
controlnet_conditioning_scale = 0.5
# controlnet_conditioning_scale = 0.8


def set_seed(seed):
Expand Down Expand Up @@ -94,8 +91,8 @@ def add_task(prompt, negative_prompt, control_type):
elif control_type == 'shape-guided':
promptA = prompt + ' P_shape'
promptB = prompt + ' P_ctxt'
negative_promptA = negative_prompt + ' P_shape'
negative_promptB = negative_prompt + ' P_ctxt'
negative_promptA = negative_prompt
negative_promptB = negative_prompt
elif control_type == 'image-outpainting':
promptA = prompt + ' P_ctxt'
promptB = prompt + ' P_ctxt'
Expand All @@ -104,18 +101,18 @@ def add_task(prompt, negative_prompt, control_type):
else:
promptA = prompt + ' P_obj'
promptB = prompt + ' P_obj'
negative_promptA = negative_prompt + ' P_obj'
negative_promptB = negative_prompt + ' P_obj'
negative_promptA = negative_prompt
negative_promptB = negative_prompt

return promptA, promptB, negative_promptA, negative_promptB



def predict(input_image, prompt, fitting_degree, ddim_steps, scale, seed,
negative_prompt, task, vertical_expansion_ratio,
horizontal_expansion_ratio):
negative_prompt, task,vertical_expansion_ratio,horizontal_expansion_ratio):
size1, size2 = input_image['image'].convert('RGB').size

if task != 'image-outpainting':
if task!='image-outpainting':
if size1 < size2:
input_image['image'] = input_image['image'].convert('RGB').resize(
(640, int(size2 / size1 * 640)))
Expand All @@ -130,44 +127,34 @@ def predict(input_image, prompt, fitting_degree, ddim_steps, scale, seed,
input_image['image'] = input_image['image'].convert('RGB').resize(
(int(size1 / size2 * 512), 512))

if (vertical_expansion_ratio is not None) and (horizontal_expansion_ratio
is not None): # noqa
o_W, o_H = input_image['image'].convert('RGB').size
c_W = int(horizontal_expansion_ratio * o_W)
c_H = int(vertical_expansion_ratio * o_H)
if vertical_expansion_ratio!=None and horizontal_expansion_ratio!=None:
o_W,o_H = input_image['image'].convert('RGB').size
c_W = int(horizontal_expansion_ratio*o_W)
c_H = int(vertical_expansion_ratio*o_H)

expand_img = np.ones((c_H, c_W, 3), dtype=np.uint8) * 127
expand_img = np.ones((c_H, c_W,3), dtype=np.uint8)*127
original_img = np.array(input_image['image'])
expand_img[int((c_H - o_H) / 2.0):int((c_H - o_H) / 2.0) + o_H,
int((c_W - o_W) / 2.0):int((c_W - o_W) / 2.0) +
o_W, :] = original_img
expand_img[int((c_H-o_H)/2.0):int((c_H-o_H)/2.0)+o_H,int((c_W-o_W)/2.0):int((c_W-o_W)/2.0)+o_W,:] = original_img

blurry_gap = 10

expand_mask = np.ones((c_H, c_W, 3), dtype=np.uint8) * 255
if vertical_expansion_ratio == 1 and horizontal_expansion_ratio != 1:
expand_mask[int((c_H - o_H) / 2.0):int((c_H - o_H) / 2.0) + o_H,
int((c_W - o_W) / 2.0) +
blurry_gap:int((c_W - o_W) / 2.0) + o_W -
blurry_gap, :] = 0
elif vertical_expansion_ratio != 1 and horizontal_expansion_ratio != 1:
expand_mask[int((c_H - o_H) / 2.0) +
blurry_gap:int((c_H - o_H) / 2.0) + o_H - blurry_gap,
int((c_W - o_W) / 2.0) +
blurry_gap:int((c_W - o_W) / 2.0) + o_W -
blurry_gap, :] = 0
elif vertical_expansion_ratio != 1 and horizontal_expansion_ratio == 1:
expand_mask[int((c_H - o_H) / 2.0) +
blurry_gap:int((c_H - o_H) / 2.0) + o_H - blurry_gap,
int((c_W - o_W) /
2.0):int((c_W - o_W) / 2.0) + o_W, :] = 0
expand_mask = np.ones((c_H, c_W,3), dtype=np.uint8)*255
if vertical_expansion_ratio == 1 and horizontal_expansion_ratio!=1:
expand_mask[int((c_H-o_H)/2.0):int((c_H-o_H)/2.0)+o_H,int((c_W-o_W)/2.0)+blurry_gap:int((c_W-o_W)/2.0)+o_W-blurry_gap,:] = 0 #noqa
elif vertical_expansion_ratio != 1 and horizontal_expansion_ratio!=1:
expand_mask[int((c_H-o_H)/2.0)+blurry_gap:int((c_H-o_H)/2.0)+o_H-blurry_gap,int((c_W-o_W)/2.0)+blurry_gap:int((c_W-o_W)/2.0)+o_W-blurry_gap,:] = 0 #noqa
elif vertical_expansion_ratio != 1 and horizontal_expansion_ratio==1:
expand_mask[int((c_H-o_H)/2.0)+blurry_gap:int((c_H-o_H)/2.0)+o_H-blurry_gap,int((c_W-o_W)/2.0):int((c_W-o_W)/2.0)+o_W,:] = 0 #noqa

input_image['image'] = Image.fromarray(expand_img)
input_image['mask'] = Image.fromarray(expand_mask)




promptA, promptB, negative_promptA, negative_promptB = add_task(
prompt, negative_prompt, task)
# print(promptA, promptB, negative_promptA, negative_promptB)
print(promptA, promptB, negative_promptA, negative_promptB)
img = np.array(input_image['image'].convert('RGB'))

W = int(np.shape(img)[0] - np.shape(img)[0] % 8)
Expand All @@ -191,8 +178,8 @@ def predict(input_image, prompt, fitting_degree, ddim_steps, scale, seed,
num_inference_steps=ddim_steps).images[0]
mask_np = np.array(input_image['mask'].convert('RGB'))
red = np.array(result).astype('float') * 1
red[:, :, 0] = 0
red[:, :, 2] = 180.0
red[:, :, 0] = 180.0
red[:, :, 2] = 0
red[:, :, 1] = 0
result_m = np.array(result)
result_m = Image.fromarray(
Expand All @@ -208,15 +195,17 @@ def predict(input_image, prompt, fitting_degree, ddim_steps, scale, seed,

dict_res = [input_image['mask'].convert('RGB'), result_m]

return result_paste, dict_res
dict_out = [input_image['image'].convert('RGB'), result_paste]

return dict_out, dict_res


def predict_controlnet(input_image, input_control_image, control_type, prompt,
ddim_steps, scale, seed, negative_prompt):
ddim_steps, scale, seed, negative_prompt,controlnet_conditioning_scale):
promptA = prompt + ' P_obj'
promptB = prompt + ' P_obj'
negative_promptA = negative_prompt + ' P_obj'
negative_promptB = negative_prompt + ' P_obj'
negative_promptA = negative_prompt
negative_promptB = negative_prompt
size1, size2 = input_image['image'].convert('RGB').size

if size1 < size2:
Expand Down Expand Up @@ -289,6 +278,7 @@ def predict_controlnet(input_image, input_control_image, control_type, prompt,
width=H,
height=W,
guidance_scale=scale,
controlnet_conditioning_scale = controlnet_conditioning_scale,
num_inference_steps=ddim_steps).images[0]
red = np.array(result).astype('float') * 1
red[:, :, 0] = 180.0
Expand All @@ -307,29 +297,29 @@ def predict_controlnet(input_image, input_control_image, control_type, prompt,
ours_np = np.asarray(result) / 255.0
ours_np = ours_np * m_img + (1 - m_img) * img_np
result_paste = Image.fromarray(np.uint8(ours_np * 255))
return result_paste, [controlnet_image, result_m]
return [input_image['image'].convert('RGB'), result_paste], [controlnet_image, result_m]


def infer(input_image, text_guided_prompt, text_guided_negative_prompt,
shape_guided_prompt, shape_guided_negative_prompt, fitting_degree,
ddim_steps, scale, seed, task, enable_control, input_control_image,
control_type, vertical_expansion_ratio, horizontal_expansion_ratio,
outpaint_prompt, outpaint_negative_prompt):
control_type,vertical_expansion_ratio,horizontal_expansion_ratio,outpaint_prompt,
outpaint_negative_prompt,controlnet_conditioning_scale,removal_prompt,
removal_negative_prompt):
if task == 'text-guided':
prompt = text_guided_prompt
negative_prompt = text_guided_negative_prompt
elif task == 'shape-guided':
prompt = shape_guided_prompt
negative_prompt = shape_guided_negative_prompt
elif task == 'object-removal':
prompt = ''
negative_prompt = ''
prompt = removal_prompt
negative_prompt = removal_negative_prompt
elif task == 'image-outpainting':
prompt = outpaint_prompt
negative_prompt = outpaint_negative_prompt
return predict(input_image, prompt, fitting_degree, ddim_steps, scale,
seed, negative_prompt, task, vertical_expansion_ratio,
horizontal_expansion_ratio)
seed, negative_prompt, task,vertical_expansion_ratio,horizontal_expansion_ratio)
else:
task = 'text-guided'
prompt = text_guided_prompt
Expand All @@ -338,10 +328,10 @@ def infer(input_image, text_guided_prompt, text_guided_negative_prompt,
if enable_control and task == 'text-guided':
return predict_controlnet(input_image, input_control_image,
control_type, prompt, ddim_steps, scale,
seed, negative_prompt)
seed, negative_prompt,controlnet_conditioning_scale)
else:
return predict(input_image, prompt, fitting_degree, ddim_steps, scale,
seed, negative_prompt, task, None, None)
seed, negative_prompt, task,None,None)


def select_tab_text_guided():
Expand All @@ -351,7 +341,6 @@ def select_tab_text_guided():
def select_tab_object_removal():
return 'object-removal'


def select_tab_image_outpainting():
return 'image-outpainting'

Expand All @@ -371,16 +360,16 @@ def select_tab_shape_guided():
"<a href='https://arxiv.org/abs/2312.03594/'>Paper</a> &ensp;"
"<a href='https://github.com/open-mmlab/mmagic/tree/main/projects/powerpaint'>Code</a> </font></div>" # noqa
)

with gr.Row():
gr.Markdown(
"**Note:** Due to network-related factors, the page may experience occasional bugs! If the inpainting results deviate significantly from expectations, consider toggling between task options to refresh the content." # noqa
)
with gr.Row():
with gr.Column():
gr.Markdown('### Input image and draw mask')
input_image = gr.Image(source='upload', tool='sketch', type='pil')

task = gr.Radio([
'text-guided', 'object-removal', 'shape-guided',
'image-outpainting'
],
task = gr.Radio(['text-guided', 'object-removal', 'shape-guided', 'image-outpainting'],
show_label=False,
visible=False)

Expand All @@ -397,6 +386,13 @@ def select_tab_shape_guided():
enable_control = gr.Checkbox(
label='Enable controlnet',
info='Enable this if you want to use controlnet')
controlnet_conditioning_scale = gr.Slider(
label='controlnet conditioning scale',
minimum=0,
maximum=1,
step=0.05,
value=0.5,
)
control_type = gr.Radio(['canny', 'pose', 'depth', 'hed'],
label='Control type')
input_control_image = gr.Image(source='upload', type='pil')
Expand All @@ -408,7 +404,13 @@ def select_tab_shape_guided():
enable_object_removal = gr.Checkbox(
label='Enable object removal inpainting',
value=True,
info='The recommended configuration for the Guidance Scale is 10 or higher. \
If undesired objects appear in the masked area, \
you can address this by specifically increasing the Guidance Scale.',
interactive=False)
removal_prompt = gr.Textbox(label='Prompt')
removal_negative_prompt = gr.Textbox(
label='negative_prompt')
tab_object_removal.select(
fn=select_tab_object_removal, inputs=None, outputs=task)

Expand All @@ -417,6 +419,9 @@ def select_tab_shape_guided():
enable_object_removal = gr.Checkbox(
label='Enable image outpainting',
value=True,
info='The recommended configuration for the Guidance Scale is 10 or higher. \
If unwanted random objects appear in the extended image region, \
you can enhance the cleanliness of the extension area by increasing the Guidance Scale.',
interactive=False)
outpaint_prompt = gr.Textbox(label='Outpainting_prompt')
outpaint_negative_prompt = gr.Textbox(
Expand Down Expand Up @@ -463,10 +468,7 @@ def select_tab_shape_guided():
label='Steps', minimum=1, maximum=50, value=45, step=1)
scale = gr.Slider(
label='Guidance Scale',
info='For object removal, \
it is recommended to set the value at 10 or above, \
while for image outpainting, \
it is advisable to set it at 18 or above.',
info='For object removal and image outpainting, it is recommended to set the value at 10 or above.', #noqa
minimum=0.1,
maximum=30.0,
value=7.5,
Expand All @@ -480,19 +482,21 @@ def select_tab_shape_guided():
)
with gr.Column():
gr.Markdown('### Inpainting result')
inpaint_result = gr.Image()
inpaint_result = gr.Gallery(
label='Generated images', show_label=False, columns=2)
gr.Markdown('### Mask')
gallery = gr.Gallery(
label='Generated images', show_label=False, columns=2)
label='Generated masks', show_label=False, columns=2)

run_button.click(
fn=infer,
inputs=[
input_image, text_guided_prompt, text_guided_negative_prompt,
shape_guided_prompt, shape_guided_negative_prompt, fitting_degree,
ddim_steps, scale, seed, task, enable_control, input_control_image,
control_type, vertical_expansion_ratio, horizontal_expansion_ratio,
outpaint_prompt, outpaint_negative_prompt
control_type,vertical_expansion_ratio,horizontal_expansion_ratio,
outpaint_prompt,outpaint_negative_prompt,controlnet_conditioning_scale,
removal_prompt,removal_negative_prompt
],
outputs=[inpaint_result, gallery])

Expand Down

0 comments on commit b08b6ca

Please sign in to comment.