-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathgenerate_jobs.py
executable file
·198 lines (141 loc) · 4.71 KB
/
generate_jobs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
import os
import sys
head="""
#!/bin/bash
#PBS -q isi
#PBS -l walltime=10:00:00
#PBS -l nodes=1:ppn=16:gpus=1:shared
ROOT_DIR=/home/xw/codeRepository/NLPspace/QA/jira_issue_clstm_multi_clf
PY=$ROOT_DIR/train.py
MODEL_DIR=$ROOT_DIR/model/__id__
DATA_DIR=$ROOT_DIR/data/__data_dir__/
TRAIN_PATH=$DATA_DIR/train
DEV_PATH=$DATA_DIR/valid
TEST_PATH=$DATA_DIR/test
DATA_FILE='/home/xw/codeRepository/NLPspace/QA/jira_issue_rcnn_multi_clf/data/SearchRequest_All.xml'
source /home/xw/xingshi/sh/init_tensorflow.sh
GPU_ID=0
if [ $1 ]
then
GPU_ID=$0;
fi
export CUDA_VISIBLE_DEVICES=$GPU_ID
__cmd__
"""
train_cmd = "python $PY --mode TRAIN --train_path $TRAIN_PATH --dev_path $DEV_PATH --model_dir $MODEL_DIR "
dump_cmd = "python $PY --mode DUMP_LSTM --test_path $TEST_PATH --model_dir $MODEL_DIR "
train_cmd = "python3 $PY --data_file= \
--clf=clstm --language=ch --num_classes=118 --vocab_size=20000 --num_epochs=30"
def main(acct=0):
def name(val):
return val, ""
def model_dir(val):
return "", "--model_dir {}".format(val)
def batch_size(val):
return "", "--batch_size {}".format(val)
def size(val):
return "h{}".format(val), "--size {}".format(val)
def dropout(val):
return "d{}".format(val), "--keep_prob {}".format(val)
def learning_rate(val):
return "l{}".format(val), "--learning_rate {}".format(val)
def n_epoch(val):
return "", "--n_epoch {}".format(val)
def num_layers(val):
return "n{}".format(val), "--num_layers {}".format(val)
def L(val):
return "", "--L {}".format(val)
def vocab_size(val):
return "", "--vocab_size {}".format(val)
def n_bucket(val):
return '', "--n_bucket {}".format(val)
funcs = [name, n_bucket, batch_size, #0
size, dropout, learning_rate, #3
n_epoch, num_layers, L, #6
vocab_size ] #9
template = ["ptb", 10, 64, #0
300, 0.5, 1.0, #3
40, 2, 110, #6
10050 ] #9
params = []
#for ptb
_sizes = [300,600,329]
_num_layers = [[1,2,3],[1,2,3],[1]]
_dropouts = [0.5,1.0]
_learning_rates = [0.5,1.0]
for i, _size in enumerate(_sizes):
ns = _num_layers[i]
for _n in ns:
for _d in _dropouts:
for _l in _learning_rates:
temp = list(template)
temp[0] = 'ptb'
temp[3] = _size
temp[7] = _n
temp[4] = _d
temp[5] = _l
params.append(temp)
#for ptbchar
template = ["ptbchar", 10, 32, #0
300, 0.8, 1.0, #3
40, 2, 300, #6
60 ] #9
_learning_rates = [0.5,1.0]
_sizes = [300,600,422]
_num_layers = [[1,2,3],[1,2,3],[1]]
_dropouts = [0.8,1.0]
for i, _size in enumerate(_sizes):
ns = _num_layers[i]
for _n in ns:
for _d in _dropouts:
for _l in _learning_rates:
temp = list(template)
temp[0] = 'ptbchar'
temp[3] = _size
temp[7] = _n
temp[4] = _d
temp[5] = _l
params.append(temp)
def get_name_cmd(paras):
name = ""
cmd = []
for func, para in zip(funcs,paras):
n, c = func(para)
name += n
cmd.append(c)
name = name.replace(".",'')
cmd = " ".join(cmd)
return name, cmd
def get_dump_cmd(paras):
cmd = []
for i in [1,3,7,8]:
func = funcs[i]
para = paras[i]
n, c = func(para)
cmd.append(c)
cmd = " ".join(cmd)
return cmd
# train
for para in params:
name, cmd = get_name_cmd(para)
dp_cmd = get_dump_cmd(para)
cmd = train_cmd + cmd
dp_cmd = dump_cmd + dp_cmd
# for train
fn = "../jobs/{}.sh".format(name)
f = open(fn,'w')
content = head.replace("__cmd__",cmd)
content = content.replace("__id__",name)
content = content.replace("__data_dir__",para[0])
f.write(content)
f.close()
# for dunp
fn = "../jobs/dump_{}.sh".format(name)
f = open(fn,'w')
content = head.replace("__cmd__",dp_cmd)
content = content.replace("__id__",name)
content = content.replace("__data_dir__",para[0])
f.write(content)
f.close()
if __name__ == "__main__":
main()