-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcmp_perf.py
256 lines (232 loc) · 11.3 KB
/
cmp_perf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
import sys
## Raw perf related data index
idx_summary_token = 0
idx_perf_by_type_token = 1
idx_perf_by_node_token = 2
## Sub-index in summray
idx_total_in_summary = 0
idx_avg_in_isummary = 1
### find the perf related data by token and pack raw data into list.
### return type is a list of list.
### return the[stream0_perf, stream1_perf, .....].
### each streamX_perf would be [graph perf, perf_by_types, perf_by_nodes],
def pack_raw_perf_data(in_file, token_list):
if len(token_list) < 2:
return[]
multi_streams_perf = []
stream_perf = []
token_perf = []
token_idx = 0
stream_start = False
token_start = False
for line in in_file:
## find the stream start token
if line.startswith('======= ENABLE_DEBUG_CAPS:OV_CPU_SUMMARY_PERF ======'):
if token_start:
stream_perf.append(token_perf)
if stream_start:
multi_streams_perf.append(stream_perf)
token_idx = 0
stream_perf = []
stream_start = True
token_start = False
elif stream_start == False:
continue
##streams start = true case.
if line.startswith(token_list[token_idx]):
if token_start:
##Append last token recorded
stream_perf.append(token_perf)
token_perf = []
token_start = True
if (token_idx != len(token_list) - 1):
token_idx += 1
elif token_start:
if line != '\n':
token_perf.append(line.strip())
##Append the all the remaining data if having.
if token_start:
stream_perf.append(token_perf)
if stream_start:
multi_streams_perf.append(stream_perf)
return multi_streams_perf
### convert the perf by nodes data into dictionary.
### return nodes perf dic.
### key: node_name
### value: list [perf_counter, precentage, implement_type]
def nodes_perf_in_dic(nodes_data):
dic = {}
for line in nodes_data:
# if (len(line.split()) > 6):
# print('!!!!!!len = {1}, expected 6, skip parsing perf "{0}"'.format(line, str(len(line.split()))))
# # continue
split_perf = line.split()
perc = split_perf[0]
perf = split_perf[2]
imp_type = split_perf[-1]
if (len(split_perf) > 6):
node_name = "".join(split_perf[4:-1])
else:
node_name = split_perf[4]
#perc, x, perf, y, *node_name, imp_type = line.split()
counter,*x = perf.split('(')
dic[node_name] = [float(counter), perc, imp_type]
return dic
### get all the streams perf by node.
### return a list of [stream0_data, stream2_data,....]
### streamXdata: tuple of (nodes perf dic , average_time).
### nodes perf: return of nodes_perf_in_dic()
def get_perf(perf_file_path):
token_list = ['Summary of ', ' perf_by_type:', ' perf_by_node:']
with open(perf_file_path) as infile:
raw_perf_streams = pack_raw_perf_data(infile, token_list)
streams_perf = []
for raw_perf_per_stream in raw_perf_streams:
dic = nodes_perf_in_dic(raw_perf_per_stream[idx_perf_by_node_token])
*a, avg_perf = raw_perf_per_stream[idx_summary_token][idx_avg_in_isummary].split()
streams_perf.append((dic, float(avg_perf)))
return streams_perf
### idx in the [perf_counter, precentage, implement_type]
idx_perf_in_list = 0
idx_perc_in_list = 1
idx_impl_type_in_list = 2
stream_mark = '####################################################################################################################################################################'
data_mark = '-----------------------------------------------------------------------------------------------------------------------------------------------------------------'
### compare 2 perf file with 'OV_CPU_SUMMARY_PERF = 1'
### return a dic of regresssed node [stream_idx:topN_regressed_list]
def compare_perf(ref_path, target_path):
targ_streams_perf = get_perf(target_path)
ref_streams_perf = get_perf(ref_path)
if len(targ_streams_perf) != len(ref_streams_perf):
print('## target streams number is {0} , not equal with refrence streams number {1}. \n target file: {3}, ref file {2}:'. format(len(targ_streams_perf), len(ref_streams_perf), ref_path, target_path))
return dict()
topN_regression_dic = {}
# print(len(targ_streams_perf))
for stream_idx in range(0, len(targ_streams_perf)):
target_perf, target_avg = targ_streams_perf[stream_idx]
ref_perf, ref_avg = ref_streams_perf[stream_idx]
### changed_dic: nodes both in ref and target
### new_dic: in target not in ref
### missing_dic: in ref not in target
changed_dic = {}
new_dic = {}
missing_dic = {}
for x in target_perf.keys():
targer_cnt = (target_perf[x])[idx_perf_in_list]
if x in ref_perf:
ref_cnt = (ref_perf[x])[idx_perf_in_list]
changed_dic[x] = targer_cnt - ref_cnt
else:
new_dic[x] = targer_cnt
for x in ref_perf.keys():
if not x in target_perf:
ref_cnt = (ref_perf[x])[idx_perf_in_list]
missing_dic[x] = ref_cnt
sorted_regressed_nodes = {k:v for k,v in changed_dic.items() if v > 1.0}
sorted_gain_nodes = {k:v for k,v in changed_dic.items() if v < -1.0}
sorted_regressed_nodes = dict(sorted(sorted_regressed_nodes.items(), key=lambda item: item[1], reverse=True))
##gain counter are negative floating. No reverse mean top gain in the first.
sorted_gain_nodes = dict(sorted(sorted_gain_nodes.items(), key=lambda item: item[1]))
sorted_new = dict(sorted(new_dic.items(), key=lambda item: item[1], reverse=True))
sorted_missing = dict(sorted(missing_dic.items(), key=lambda item: item[1], reverse=True))
total_regression_perc = (target_avg - ref_avg) * 100.0 / ref_avg
nodes_regression_perc = sum(sorted_regressed_nodes.values()) * 100 / ref_avg
nodes_gain_perc = sum(sorted_gain_nodes.values()) * 100 / ref_avg
new_regression_perc = sum(new_dic.values()) * 100 / ref_avg
missing_prec = sum(missing_dic.values()) * 100 / ref_avg
# print(sorted_nodes)
topN = 10
topN_nodes = []
node_num = 0
print(stream_mark)
print('@@@STREAM ID: {0} , Total regression: [{6:.0f}us -> {7:.0f}us, {1:.3f}% ], Break down into 4 cases: ({2:.3f}%) + ({3:.3f}%) + ({4:.3f}%) + (-{8:.3f}%) = {5:.3f}% (regress - gain + new - missing) '.format(stream_idx,
total_regression_perc, nodes_regression_perc,
nodes_gain_perc, new_regression_perc, nodes_regression_perc+nodes_gain_perc+new_regression_perc-missing_prec,
ref_avg, target_avg, missing_prec))
print(stream_mark)
### print the regressed node.
print('TOP {0} REGRESSED NODE LIST:'.format(topN))
impl_dic = dict();
for name, cnt in sorted_regressed_nodes.items():
t_type = (target_perf[name])[idx_impl_type_in_list]
r_type = (ref_perf[name])[idx_impl_type_in_list]
# The regressed count contribution to whole avg cnt
perc_in_avg = cnt * 100.0 / ref_avg
impl_key= f'{r_type}{"-->"}{t_type}'
if impl_key in impl_dic.keys():
impl_dic[impl_key] += perc_in_avg
else:
impl_dic[impl_key] = perc_in_avg
if node_num < topN:
t_cnt = (target_perf[name])[idx_perf_in_list]
r_cnt = (ref_perf[name])[idx_perf_in_list]
perc_with_ref = cnt * 100.0 / r_cnt
print('node_diff/ref_total_latency: {0:<6.2f} % , {1:>12.0f}us @{4:<35} -> {2:>12.0f}us@{5:<35} : {3:<6.2f} % , {6:<20} '.format(perc_in_avg, r_cnt, t_cnt, perc_with_ref, r_type, t_type, name))
node_num += 1
topN_nodes.append(name)
impl_dic = dict(sorted(impl_dic.items(), key=lambda x: x[1], reverse=True))
print('*****************************')
print('All Regressed by type summary:')
for key, val in impl_dic.items():
print('{0:<100} : {1:<.2f}%'.format(key, val))
topN_regression_dic[stream_idx] = topN_nodes
### print the gained node information
print(data_mark)
print('TOP {0} PERF GAIN NODE LIST:'.format(topN))
impl_dic.clear()
node_num = 0
for name, cnt in sorted_gain_nodes.items():
t_type = (target_perf[name])[idx_impl_type_in_list]
r_type = (ref_perf[name])[idx_impl_type_in_list]
# The regressed count contribution to whole avg cnt
perc_in_avg = cnt * 100.0 / ref_avg
impl_key= f'{r_type}{"-->"}{t_type}'
if impl_key in impl_dic.keys():
impl_dic[impl_key] += perc_in_avg
else:
impl_dic[impl_key] = perc_in_avg
if node_num < topN:
t_cnt = (target_perf[name])[idx_perf_in_list]
r_cnt = (ref_perf[name])[idx_perf_in_list]
perc_in_avg = cnt * 100.0 / ref_avg
perc_with_ref = cnt * 100.0 / r_cnt
print('node_diff/ref_total_latency: {0:<6.2f} % , {1:>12.0f}us @{4:<35} -> {2:>12.0f}us@{5:<35} : {3:<6.2f} % , {6:<20} '.format(perc_in_avg, r_cnt, t_cnt, perc_with_ref, r_type, t_type, name))
node_num += 1
impl_dic = dict(sorted(impl_dic.items(), key=lambda x: x[1]))
print('*****************************')
print('All gain by type summary:')
for key, val in impl_dic.items():
print('{0:<100} : {1:<.2f}%'.format(key, val))
topN_regression_dic[stream_idx] = topN_nodes
### print the missing node information
print(data_mark)
print('TOP {0} MISSING NODES LIST WITH THRESHOLD_PC > 5 :'.format(topN))
node_num = 0
for name, cnt in sorted_missing.items():
if node_num < topN and cnt > 5 :
t_cnt = (ref_perf[name])[idx_perf_in_list]
t_type = (ref_perf[name])[idx_impl_type_in_list]
perc_in_avg = cnt * 100.0 / ref_avg
print('node_diff/ref_total_latency: {0:<6.2f} % , {1:>12.0f}us @{2:<35} , {3:<20} '.format(perc_in_avg, t_cnt, t_type, name))
node_num += 1
print(data_mark)
### print the new added node information
print(data_mark)
print('TOP {0} NEW NODES LIST WITH THRESHOLD_PC > 5 :'.format(topN))
node_num = 0
for name, cnt in sorted_new.items():
if node_num < topN and cnt > 5 :
t_cnt = (target_perf[name])[idx_perf_in_list]
t_type = (target_perf[name])[idx_impl_type_in_list]
perc_in_avg = cnt * 100.0 / ref_avg
print('node_diff/ref_total_latency: {0:<6.2f} % , {1:>12.0f}us @{2:<35} , {3:<20} '.format(perc_in_avg, t_cnt, t_type, name))
node_num += 1
print(data_mark)
print('\n')
return topN_regression_dic
## Uncomment the Direct debug with file inputs.
# ref_path = sys.argv[1]
# target_path = sys.argv[2]
# node_list = compare_perf(ref_path, target_path)
# print(node_list)
# fp_nodes = open('./nodes.txt', 'w')