-
Notifications
You must be signed in to change notification settings - Fork 0
/
profile.py
47 lines (36 loc) · 1.56 KB
/
profile.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import torch
from torch.profiler import profile, record_function, ProfilerActivity
from modeling.openclip_model import OpenCLIPModel as OPT_Model
from clip_server.model.openclip_model import OpenCLIPModel as ORG_Model
def profiler(name, mode, B):
# Load Model: mock input
opt_model = OPT_Model(name=name, device='cuda')
org_model = ORG_Model(name=name, device='cuda')
# setup inputs
if mode == 'text':
input = torch.randint(0, 10, (B, 77)).long().cuda()
elif mode == 'image':
input = torch.randint(0, 10, (B, 3, 224, 224)).half().cuda()
# setup encode fn
if mode == 'text':
org_encode = org_model.encode_text
opt_encode = opt_model.encode_text
elif mode == 'image':
org_encode = org_model.encode_image
opt_encode = opt_model.encode_image
# warm up
for _ in range(10):
_1 = org_encode(input)
_2 = opt_encode(input)
# profile time cosumption
with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], record_shapes=True) as prof:
with record_function("model_inference"):
_1 = org_encode(input)
print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=20))
with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], record_shapes=True) as prof:
with record_function("model_inference"):
_2 = opt_encode(input)
print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=20))
if __name__ == "__main__":
name = 'ViT-B-16::laion400m_e31'
profiler(name, 'text', 8)