forked from tbarbette/npf
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnpf_profile.py
161 lines (135 loc) · 5.52 KB
/
npf_profile.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
#!/usr/bin/env python3
import argparse
from tqdm import tqdm
from npf import npf
from npf.regression import *
from pathlib import Path
from npf.testie import Testie
from npf.statistics import Statistics
from npf.variable import *
from variable_c import *
import cProfile
import pstats
import io
import pickle
from pstats import SortKey
def profiler(f, data, m):
results = []
for m in range(m):
with cProfile.Profile() as pr:
for a in data:
f(a)
pr.create_stats()
results.append(pd.DataFrame(pr.getstats(), columns=['func', 'ncalls', 'ccalls', 'tottime', 'cumtime', 'callers']))
# s = io.StringIO()
# sortby = SortKey.CALLS
# ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
# ps.print_stats(1)
# print(s.getvalue())
return pd.concat(results)
def generate_datasets(n, force=False):
if os.path.exists("datasets.pickle") and not force:
print("Load datasets from file")
with open("datasets.pickle", "rb") as f:
datasets = pickle.load(f)
else:
datasets = {
"Integers": [i*17 for i in range(0,n)],
"Negative Integers" : [-i*17 for i in range(n,0)],
"Mixed Integers" : [-i*17 for i in range(int(-n/2),int(n/2))],
"Floats" : [(i*193)/0.17 for i in range(0,n)],
"Int as str" : [str(i*17) for i in range(0,n)],
"Float as str" : [str((i*193)/0.17) for i in range(0,n)],
"Strings (beginning)": ['a'+str(i*17) for i in range(0,n)],
"Strings (end)": [str(i*17)+'a' for i in range(0,n)],
"Strings (middle)": ['bbbb'+str(i*17)+'aaaa' for i in range(0,n)],
"Strings (middle2)": [str(i*17)+'aaaa'+"1234" for i in range(0,n)],
"String with floats": ['bbbb'+str((193*i)/1.7)+'aaaa' for i in range(0,n)],
"Bad floats" : ['11111.111'+str((193*i)/1.7)+'112.1' for i in range(0,n)],
"Zeros": [0 for i in range(0,n)],
"Zeros and ones" : [i%2 for i in range(0,n)]
}
for k,v in datasets.items():
random.shuffle(v)
mix = []
l = int( n / len(datasets))
for k,v in datasets.items():
mix+=random.choices(v,k=min(l,len(v)))
random.shuffle(mix)
datasets["Mix"] = mix
print("Saving datasets for future runs")
with open("datasets.pickle", "wb") as f:
pickle.dump(datasets,f)
return datasets
def load_results(force=False):
if os.path.exists("profiling.pickle") and not force:
print("Load previous results from file")
with open("profiling.pickle", "rb") as f:
results = pickle.load(f)
else:
results = pd.DataFrame([])
return results
def save_results(df):
if len(df) != 0:
with open("profiling.pickle", "wb") as f:
df_pickle = df[["dataset", "function", "cumtime", "ncalls", "iterations", "data_size"]]
pickle.dump(df_pickle, f)
def plot(df,variants, iterations, data_size):
results = df
results= results[results.iterations == iterations]
results = results[results.data_size == data_size]
print(results[["dataset", "function", "cumtime", "data_size", "iterations"]])
for f in variants:
xy = results[results["function"] ==f].groupby("dataset")
x = xy["dataset"].groups.keys()
y = xy["cumtime"].mean()
yerr = xy["cumtime"].std()
yerr = [0 for i in yerr]
plt.errorbar(x,y,yerr= yerr, label = f, marker=".",linestyle="-")
plt.legend()
plt.xticks(rotation='vertical')
plt.savefig("profiling.pdf", bbox_inches="tight")
def already(df, dataset, function, iterations, size, force):
if force or len(df) == 0:
return False
df_s = df[df.dataset == dataset]
df_s = df_s[df_s.function == function.__name__]
df_s = df_s[df_s.iterations == iterations]
df_s = df_s[df_s.data_size == size]
return len(df_s) > 1
parser = argparse.ArgumentParser( "Utility program to check NPF's components performance")
parser.add_argument("--component", default="is_numeric")
parser.add_argument("--variant", default=None, required=False)
parser.add_argument("--iterations", "-n", default=1000, help="Number of iterations", type=int)
parser.add_argument("--size", "-s", default=1000, help="Datasets size", type=int)
parser.add_argument("--dataset", "-d", default=None, required=False, help="Run for a single dataset")
parser.add_argument("--force", "-f", action="store_true", help="Force re-run, overwriting current results in the database")
args = parser.parse_args()
results = load_results(args.force)
numeric_datasets = generate_datasets(args.size, args.force)
if args.component == "is_numeric":
variants = [is_numeric, is_numeric2, is_numeric3, is_numeric4,is_numeric6, is_numeric5]
variants_names = [v.__name__ for v in variants]
if args.variant != None:
functions = [f for f in variants if f.__name__ == args.variant]
else:
functions = variants
print("Running test on ", [f.__name__ for f in functions] )
for name, data in tqdm(numeric_datasets.items()):
if args.dataset and args.dataset != name:
continue
print("---------------------------------------------")
for f in tqdm(variants):
if already(results, name, f, args.iterations, args.size, args.force):
print("Result already exists. Skipping")
continue
res = profiler(f,data, args.iterations)
res = res.assign(dataset=name, function=f.__name__, iterations = args.iterations, data_size=args.size)
#res["dataset"] = name
#res["function"] = f.__name__
results = pd.concat([res[res.ncalls == res.ncalls.max()], results])
print("-----------------")
plot(results, variants_names, args.iterations, args.size)
save_results(results)
else:
print("Unknown component to test!")