-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathint8calibration.py
122 lines (98 loc) · 4.02 KB
/
int8calibration.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '7'
import pycuda.driver as cuda
import pycuda.autoinit
import numpy as np
import tensorrt as trt
import torch
import acap
import random
import pickle
class PythonEntropyCalibrator(trt.IInt8EntropyCalibrator):
def __init__(self, input_layers, stream):
trt.IInt8EntropyCalibrator.__init__(self)
self.input_layers = input_layers
self.stream = stream
self.d_input = cuda.mem_alloc(self.stream.calibration_data.nbytes)
stream.reset()
def get_batch_size(self):
return self.stream.batch_size
def get_batch(self, bindings, names='data'):
batch = self.stream.next_batch()
if not batch.size:
return None
cuda.memcpy_htod(self.d_input, batch)
for i in self.input_layers[0]:
assert names[0] != i
bindings[0] = int(self.d_input)
return bindings
def read_calibration_cache(self, length):
return None
def write_calibration_cache(self, ptr, size):
# cache = ctypes.c_char_p(int(ptr))
# with open('calibration_cache.bin', 'wb') as f:
# f.write(cache.value)
return None
class ImageBatchStream():
def __init__(self, batch_size, calibration_files):
self.batch_size = batch_size
self.max_batches = (len(calibration_files) // batch_size) + \
(1 if (len(calibration_files) % batch_size) else 0)
self.final_shape = [448, 448]
self.files = calibration_files
self.calibration_data = np.zeros((batch_size, 3, self.final_shape[1], self.final_shape[0]), dtype=np.float32)
self.batch = 0
self.cap = acap.acap()
self.frame = np.zeros((3, self.final_shape[0], self.final_shape[1]), dtype=np.float32)
self.buf = np.zeros((3, 2000, 2000), dtype=np.int8)
def read_image_chw(self, path):
num = random.sample([0, 1, 2, 3], 1)[0]
self.cap.decode(path, 3, num, self.final_shape[1], 1, self.frame.ctypes._data, self.buf.ctypes._data)
clip = self.frame.copy()
return clip
def reset(self):
self.batch = 0
def next_batch(self):
if self.batch < self.max_batches:
imgs = []
files_for_batch = self.files[self.batch_size * self.batch:self.batch_size * (self.batch + 1)]
for f in files_for_batch:
print("[ImageBatchStream] Processing ", f)
img = self.read_image_chw(f)
# img = self.preprocessor(img)
imgs.append(img)
for i in range(len(imgs)):
self.calibration_data[i] = imgs[i]
self.batch += 1
return np.ascontiguousarray(self.calibration_data, dtype=np.float32)
else:
return np.array([])
def main():
root = '/home/share/aichallenge/'
model_name = 'resnet-101-aic-448-9000'
model_path = 'onnx/' + model_name + '.onnx'
f = open(os.path.join(root, 'MyvalEncode30OneLabel.pickle'), 'rb')
calibration_files = pickle.load(f)
np.random.shuffle(calibration_files)
calibration_files = calibration_files[:128]
for i, item in enumerate(calibration_files):
calibration_files[i] = item[0].replace('/home/kcheng/AItemp/data/', '/home/share/aichallenge/')
f.close()
batchstream = ImageBatchStream(32, calibration_files)
int8_calibrator = PythonEntropyCalibrator(["data"], batchstream)
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
builder = trt.Builder(TRT_LOGGER)
builder.int8_mode = True
builder.int8_calibrator = int8_calibrator
builder.max_batch_size = 4
builder.max_workspace_size = 10000000000
network = builder.create_network()
parser = trt.OnnxParser(network, TRT_LOGGER)
with open(model_path, 'rb') as model:
parser.parse(model.read())
# do calibration and optimization here
engine = builder.build_cuda_engine(network)
with open("engine/" + model_name + '-b32-f128-nettrt' + '.engine', "wb") as f:
f.write(engine.serialize())
if __name__ == '__main__':
main()