-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathrandom_search.py
107 lines (94 loc) · 3.95 KB
/
random_search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import argparse
import time
import pandas
from copy import deepcopy
from utils import *
import warnings
warnings.filterwarnings("ignore")
seed_list = list(range(3407, 10000, 10))
def set_seed(seed=3407):
os.environ['PYTHONHASHSEED'] = str(seed)
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
parser = argparse.ArgumentParser()
parser.add_argument('--trials', type=int, default=100)
parser.add_argument('--semi_supervised', type=int, default=0)
parser.add_argument('--models', type=str, default=None)
parser.add_argument('--datasets', type=str, default=None)
parser.add_argument('--device', type=str, default='cuda')
args = parser.parse_args()
columns = ['name']
new_row = {}
datasets = ['reddit', 'weibo', 'amazon', 'yelp', 'tfinance',
'elliptic', 'tolokers', 'questions', 'dgraphfin', 'tsocial']
models = model_detector_dict.keys()
if args.datasets is not None:
if '-' in args.datasets:
st, ed = args.datasets.split('-')
datasets = datasets[int(st):int(ed)+1]
else:
datasets = [datasets[int(t)] for t in args.datasets.split(',')]
print('Evaluated Datasets: ', datasets)
if args.models is not None:
models = args.models.split('-')
print('Evaluated Baselines: ', models)
for dataset_name in datasets:
for metric in ['AUROC', 'AUPRC', 'RecK', 'Time']:
columns.append(dataset_name+'-'+metric)
results = pandas.DataFrame(columns=columns)
best_model_configs = {}
file_id = None
for model in models:
model_result = {'name': model}
best_model_configs[model] = {}
for dataset_name in datasets:
print('============Dataset {} Model {}=============='.format(dataset_name, model))
auc_list, pre_list, rec_list = [], [], []
set_seed()
time_cost = 0
best_val_score = 0
train_config = {
'device': args.device,
'epochs': 100,
'patience': 20,
'metric': 'AUPRC',
'inductive': False
}
data = Dataset(dataset_name)
data.split(args.semi_supervised, 0)
for t in range(args.trials):
print("Dataset {}, Model {}, Trial {}, Time Cost {:.2f}".format(dataset_name, model, t, time_cost))
if time_cost > 7200: # 86400 Stop after 1 day
break
try:
model_config = sample_param(model, dataset_name, t)
detector = model_detector_dict[model](train_config, model_config, data)
train_config['seed'] = seed_list[t]
st = time.time()
print("model_config: ", model_config)
test_score = detector.train()
if detector.best_score > best_val_score:
print("****current best score****")
best_val_score = detector.best_score
best_model_config = deepcopy(model_config)
best_troc, best_tprc, best_treck = test_score['AUROC'], test_score['AUPRC'], test_score['RecK']
ed = time.time()
time_cost += ed - st
print("Current Val Best:{:.4f}; Test AUC:{:.4f}, PRC:{:.4f}, RECK:{:.4f}".format(
detector.best_score, test_score['AUROC'], test_score['AUPRC'], test_score['RecK']))
except:
torch.cuda.empty_cache()
print("best_model_config:", best_model_config)
best_model_configs[model][dataset_name] = deepcopy(best_model_config)
model_result[dataset_name+'-AUROC'] = best_troc
model_result[dataset_name+'-AUPRC'] = best_tprc
model_result[dataset_name+'-RecK'] = best_treck
model_result[dataset_name+'-Time'] = time_cost/(t+1)
model_result = pandas.DataFrame(model_result, index=[0])
results = pandas.concat([results, model_result])
file_id = save_results(results, file_id)