-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathgpv.yaml
161 lines (151 loc) · 3.61 KB
/
gpv.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
exp_name: default
output_dir: path_to_output_dir # e.g. $HOME/Data/gpv_output_dir
exp_dir: ${output_dir}/${exp_name}
tb_dir: ${exp_dir}/tb_logs
ckpt_dir: ${exp_dir}/ckpts
data_dir: path_to_data_dir # e.g $HOME/Data/gpv_data_dir
gpu: 0
num_nodes: 1
ngpus_per_node: 4
world_size: null #computed dynamically as num_nodes*ngpus_per_node
rank: 0
workers: ${training.num_workers}
batch_size: ${training.batch_size}
dist_backend: nccl
dist_url: 'tcp://localhost:10001'
multiprocessing_distributed: True
hydra:
run:
dir: ${output_dir}/${exp_name}
defaults:
- task: coco_learning_tasks
- learning_datasets: vqa
model:
pretr_detr: ${data_dir}/detr/detr_coco_sce.pth
vocab: ${data_dir}/learning_phase_data/vocab/vocab.json
vocab_embed: ${data_dir}/learning_phase_data/vocab/vocab_embed.npy
max_pos_enc_len: 30
max_text_len: 20
answer_head: null
answering_type: generation
hidden_dim: 768
roi_head: True
relevance_conditioning: True
detr:
num_queries: 100
num_classes: 1
hidden_dim: 256
nheads: 8
num_encoder_layers: 6
num_decoder_layers: 6
backbone: resnet50
lr_backbone: ${training.lr_backbone}
position_embedding: sine
masks: False
dilation: False
dropout: 0.1
dim_feedforward: 2048
pre_norm: False
aux_loss: False
frozenbatchnorm: True
last_layer_only: True
detr_joiner:
detr_dim: 2304
out_dim: ${model.hidden_dim}
bert_joiner:
bert_dim: 768
out_dim: ${model.hidden_dim}
text_decoder:
hidden_dim: ${model.hidden_dim}
dropout: ${model.detr.dropout}
nheads: ${model.detr.nheads}
pos_enc: False
num_layers: 3
co_att:
visualization: False
bi_num_attention_heads: 16
bi_hidden_size: ${model.hidden_dim}
hidden_size: ${model.hidden_dim}
intermediate_size: 3072
output_size: ${model.hidden_dim}
attention_probs_dropout_prob: ${model.detr.dropout}
hidden_dropout_prob: ${model.detr.dropout}
hidden_act: gelu
v_hidden_size: ${model.hidden_dim}
v_intermediate_size: 3072
v_output_size: ${model.hidden_dim}
v_attention_probs_dropout_prob: ${model.detr.dropout}
v_hidden_dropout_prob: ${model.detr.dropout}
v_hidden_act: gelu
num_layers: 3
losses: ${losses}
losses:
CaptionLoss:
name: caption_criterion
pad_idx: null
loss_wts:
loss_caption: 5e-2
VqaLoss:
name: vqa_criterion
pad_idx: null
loss_wts:
loss_vqa: 1
ClsLoss:
name: cls_criterion
pad_idx: null
loss_wts:
loss_cls: 1
Localization:
name: localization_criterion
cost_wts:
ce: 1
bbox: 5
giou: 2
loss_wts:
loss_ce: 1
loss_bbox: 5
loss_giou: 2
eos_coef: 0.1
num_classes: ${model.detr.num_classes}
training:
ckpt: null # ${ckpt_dir}/model.pth
freeze: False # freeze Detr layers
frozen_epochs: 10
frozen_batch_size: 120
num_epochs: 40 # will be set to frozen_epochs if freeze is True
batch_size: 120 # will be set to frozen_batch_size if freeze is True
num_workers: 30
vis_step: 2000
log_step: 10
ckpt_step: 2000
lr: 1e-4
lr_backbone: 1e-5
weight_decay: 1e-4
lr_milestones:
- 10
- 15
- 20
- 25
- 30
- 35
lr_drop: 0.5
lr_warmup: True
lr_linear_decay: True
lr_warmup_fraction: 0.1
clip_max_norm: 0.1
run_vis_at_launch: True
num_vis_samples: 15
run_eval_at_launch: True
num_val_samples:
coco_vqa: 20000
coco_cap: 10000
coco_det: 10000
coco_cls: 10000
eval:
task: CocoVqa
ckpt: ${exp_dir}/ckpts/model.pth
batch_size: 20
num_workers: 20
subset: val
predict: True
num_eval_batches: null # set to null to evaluate on full dataset