-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain_class.py
106 lines (93 loc) · 4.9 KB
/
main_class.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import numpy as np
import time
from sklearn.model_selection import train_test_split
from spektral.utils.convolution import normalized_adjacency
from spektral.datasets.tud import load_data
from modules.models import RGNN_classifier
from modules.pooling import preprocess
import pickle
np.random.seed(0)
######## HYPERPARAMS ########
P = {
'MODE':'pool', # Strategy to generate embeddings: {'deep', 'arma', 'pool'}
'T': 3, # Number of stacked layers
'K': 1, # Number of parallel gESN in each module
'n_internal_units': 50, # Number of reservoir units in each gESN
'spectral_radius': .5, # Reservoir spectral radius (if None, provide max_norm)
'max_norm': None, # Reservoir max L2 norm (if None, provide spectral_radius)
'connectivity': 1, # Connectivity of reservoir. If <1, the rervoir weights matrix is sparse
'in_scaling': .5, # Scaling of the input weights in the first layer
'hid_scaling': .8, # Scaling of the input weights in the hidden layers
'input_weights_mode': 'uniform', # Options: {'very_sparse', 'sparse_uniform', 'binomial', 'uniform'}
'return_last': False, # Use only the output of the last layer to compute the embeddings
'aggregation': 'sum', # Global pooling aggregation. Options: {'sum', 'average'}
'readout': False, # Use a nonlinear readout if True
'readout_units_factor': 3, # Multiplicative factor of the number of units in the ELM readout
'alpha': 1e0, # Ridge regression regularization
'pool': 'decim', # Pooling strategy (ignored if MODE != 'pool). Options: {'graclus', 'nmf', 'decim'}
'coarse_level': [0, 1, 2], # Defines the pooling levels (only if MODE == 'pool). It should be consisntent with T
'dataset_ID': 'synth', # TUD datasets IDs or "synth"
}
print(P)
######## LOAD DATA ########
PRECOMPUTED = False
if P['dataset_ID'] == 'synth':
loaded = np.load('data/easy.npz', allow_pickle=True)
X_train, A_train, y_train = loaded['tr_feat'], list(loaded['tr_adj']), loaded['tr_class']
X_test, A_test, y_test = loaded['te_feat'], list(loaded['te_adj']), loaded['te_class']
y_train = np.argmax(y_train, axis=1)
y_test = np.argmax(y_test, axis=1)
else:
A, X, y = load_data(P['dataset_ID'], normalize_features=True, clean=False)
y = np.argmax(y, axis=-1)
# Train/test split
A_train, A_test, \
X_train, X_test, \
y_train, y_test = train_test_split(A, X, y, test_size=0.1, stratify=y)
if P['MODE'] == 'pool':
if PRECOMPUTED:
L_train, L_test, D_train, D_test, X_train, X_test, y_train, y_test = pickle.load(open('data/'+P['dataset_ID']+"_pool_"+P['pool']+".pkl","rb"))
else:
print('Computing coarsened Laplacians')
A_train, X_train, D_train = preprocess(A_train, X_train, coarsening_levels=P['coarse_level'], pool=P['pool'])
A_test, X_test, D_test = preprocess(A_test, X_test, coarsening_levels=P['coarse_level'], pool=P['pool'])
L_train = [[normalized_adjacency(a_).astype(np.float32) for a_ in A] for A in A_train]
L_test = [[normalized_adjacency(a_).astype(np.float32) for a_ in A] for A in A_test]
# save
f = open('data/'+P['dataset_ID']+"_pool_"+P['pool']+".pkl","wb")
pickle.dump([L_train, L_test, D_train, D_test, X_train, X_test, y_train, y_test], f)
f.close()
else:
L_train = [normalized_adjacency(A_) for A_ in A_train]
L_test = [normalized_adjacency(A_) for A_ in A_test]
# Dictionary that specifies the gESN config
gESN_dict = {
'n_internal_units': P['n_internal_units'],
'spectral_radius': P['spectral_radius'],
'max_norm': P['max_norm'],
'leak': None,
'connectivity': P['connectivity'],
'noise_level': 0.0,
'circle': False,
'input_weights_mode': P['input_weights_mode']}
# Initialize the RC model (embedding generator + classifier)
m = RGNN_classifier(embedding_model=P['MODE'],
K=P['K'],
T=P['T'],
in_scaling=P['in_scaling'],
hid_scaling=P['hid_scaling'],
return_last=P['return_last'],
aggregation=P['aggregation'],
alpha_ridge=P['alpha'],
readout=P['readout'],
readout_units_factor=P['readout_units_factor'],
**gESN_dict)
start_time = time.time()
if P['MODE'] == 'pool':
tr_score = m.fit(y_train, L_train, X_train, D_train)
test_score = m.transform(y_test, L_test, X_test, D_test)
else:
tr_score = m.fit(y_train, L_train, X_train)
test_score = m.transform(y_test, L_test, X_test)
end_time = time.time() - start_time
print('tr_score: {:.3f}, test_score: {:.3f}, time: {:.1f}'.format(tr_score, test_score, end_time))