for fianl

haswelliris · May 7, 2018 · c65ec22 · c65ec22
1 parent f287554
commit c65ec22
Show file tree

Hide file tree

Showing 3 changed files with 20 additions and 9 deletions.
diff --git a/script/convert_msmarco.py b/script/convert_msmarco.py
@@ -4,6 +4,7 @@
 import nltk
 import re
 import bisect
+import argparse
 
 def smith_waterman(tt,bb):
     # adapted from https://gist.github.com/radaniba/11019717
@@ -125,13 +126,13 @@ def trim_empty(tokens):
     return [t for t in tokens if t != '']
 
 def convert(file, outfile, is_test):
-    with gzip.open(file,'rb') as f:
+    with open(file,'r', encoding='utf8') as f:
         with open(outfile, 'w', encoding='utf-8') as out:
             for i,line in enumerate(f):
-                j = json.loads(line.decode('utf-8'))
+                j = json.loads(line)
                 p = j['passages']
 
-                if j['query_type'] == 'description':
+                if j['query_type'].lower() == 'description':
                     context = preprocess(' '.join([pp['passage_text'] for pp in p]))
                     ctokens = trim_empty(tokenize(context, context_mode=True))
                     normalized_context = ' '.join(ctokens)
@@ -168,7 +169,15 @@ def convert(file, outfile, is_test):
                     else:
                         output = [str(j['query_id']), j['query_type'], ' '.join(nctokens),' '.join(qtokens)]
                         out.write("%s\n"%'\t'.join(output))
-
+'''
 convert('train_v1.1.json.gz', 'train.tsv', False)
 convert('dev_v1.1.json.gz', 'dev.tsv', False)
 convert('test_public_v1.1.json.gz', 'test.tsv', True)
+'''
+if __name__ == '__main__':
+    parser=argparse.ArgumentParser()
+    parser.add_argument('source',help='source json file')
+    parser.add_argument('dest', help='destination tsv file')
+    parser.add_argument('--test',help='if is test dataset',action='store_true')
+    args = parser.parse_args()
+    convert(args.source, args.dest, args.test)
diff --git a/script/polymath.py b/script/polymath.py
@@ -474,13 +474,14 @@ def input_layer(self,cgw,cnw,qgw,qnw):
             'input_layer',
             'input_layer')
     def self_attention_layer(self, context):
-        dense = C.layers.Dense(2*self.hidden_dim, activation=C.relu)
+        dense = C.layers.Dense(self.hidden_dim, activation=C.relu)
         rnn = OptimizedRnnStack(self.hidden_dim,bidirectional=True, use_cudnn=self.use_cudnn)
         context1 = dense(context)
         process_context = rnn(context1)
         # residual attention
-        att_context, wei = self.dot_attention(process_context,process_context,self.hidden_dim*2).outputs
-        dense2 = C.layers.Dense(self.hidden_dim*2,activation=C.relu)(att_context)
+        att_context, wei = self.dot_attention(process_context,process_context,\
+            self.hidden_dim).outputs
+        dense2 = C.layers.Dense(self.hidden_dim, activation=C.relu)(att_context)
         res = dense2+context1
         return dense2
     def build_model(self):
@@ -505,7 +506,8 @@ def build_model(self):
         c_enhance = C.splice(c_processed, c_elmo, df)
         q_enhance = C.splice(q_processed, q_elmo, qf)
         att_context, wei = self.attention_layer(c_enhance, q_enhance,
-            dimc= 2*self.hidden_dim+1027, dimq=2*self.hidden_dim+1025, common_dim=2*self.hidden_dim+1024).outputs
+            dimc= 2*self.hidden_dim+1027, dimq=2*self.hidden_dim+1025,\
+            common_dim=2*self.hidden_dim+1024).outputs
         self_context = self.self_attention_layer(att_context) # 2*hidden_dim
         # modeling layer
         mod_context = self.modeling_layer(self_context)

diff --git a/script/rnetmodel.py b/script/rnetmodel.py
@@ -132,7 +132,7 @@ def build_model(self):
         input_phs = {'cgw':cgw, 'cnw':cnw, 'qgw':qgw, 'qnw':qnw,
                         'cc':cc, 'qc':qc, 'ab':ab, 'ae':ae}
         self._input_phs = input_phs
-        seif.info['query'] = C.splice(qgw, qnw)
+        self.info['query'] = C.splice(qgw, qnw)
         self.info['doc'] = C.splice(cgw, gnw)
         # graph
         pu, qu = self.input_layer(cgw, cnw, cc, qgw, qnw, qc).outputs