From 944b8c092650e63327854a32bcb6509d2163a4b1 Mon Sep 17 00:00:00 2001
From: hanhxiao <hanhxiao@tencent.com>
Date: Wed, 21 Aug 2019 15:59:59 +0800
Subject: [PATCH] fix(ci): fix unit tests for modules

---
 README.md                                  |  2 +-
 gnes/preprocessor/__init__.py              |  2 +-
 tests/contrib/fake_faiss.py                |  1 -
 tests/contrib/fake_faiss2.py               |  1 -
 tests/test_annoyindexer.py                 |  2 --
 tests/test_audio_preprocessor.py           |  2 +-
 tests/test_bindexer.py                     |  2 +-
 tests/test_dict_indexer.py                 |  2 --
 tests/test_gif.py                          |  2 +-
 tests/test_gpt_encoder.py                  |  2 +-
 tests/test_hash_encoder.py                 |  4 ++--
 tests/test_hash_indexer.py                 |  6 ++++--
 tests/test_image_encoder.py                | 14 +++++++-------
 tests/test_mfcc_encoder.py                 |  2 +-
 tests/test_onnx_image_encoder.py           |  2 ++
 tests/test_pytorch_transformers_encoder.py |  1 +
 tests/test_router.py                       |  2 +-
 tests/test_video_preprocessor.py           |  2 +-
 tests/yaml/preprocessor1.yml               |  2 +-
 tests/yaml/router-chunk-reduce.yml         |  2 +-
 tests/yaml/test-preprocessor.yml           |  2 +-
 tutorials/component-yaml-spec.md           |  7 +++----
 yaml-example/component/preprocessor.yml    |  2 +-
 23 files changed, 32 insertions(+), 34 deletions(-)
diff --git a/README.md b/README.md
index fb2c8eca..bcbc1c53 100644
--- a/README.md
+++ b/README.md
@@ -278,7 +278,7 @@ Now let's see what the YAML config says. First impression, it is pretty intuitiv
  <summary>Preprocessor config: text-prep.yml (click to expand...)</summary>
  
 ```yaml
-!TextPreprocessor
+!PunctSplitPreprocessor
 parameters:
   start_doc_id: 0
   random_doc_id: True
diff --git a/gnes/preprocessor/__init__.py b/gnes/preprocessor/__init__.py
index c3a75c8c..689d251d 100644
--- a/gnes/preprocessor/__init__.py
+++ b/gnes/preprocessor/__init__.py
@@ -33,7 +33,7 @@
     'FFmpegVideoSegmentor': 'video.ffmpeg',
     'ShotDetectPreprocessor': 'video.shotdetect',
     'AudioVanilla': 'audio.audio_vanilla',
-    'BaseAudioPreprocessor': 'base'
+    'BaseAudioPreprocessor': 'base',
     'RawChunkPreprocessor': 'base'
 }
 
diff --git a/tests/contrib/fake_faiss.py b/tests/contrib/fake_faiss.py
index 06108c24..7429d37e 100644
--- a/tests/contrib/fake_faiss.py
+++ b/tests/contrib/fake_faiss.py
@@ -8,4 +8,3 @@ def __init__(self, bar: int, *args, **kwargs):
         self.is_trained = True
         self.bar = bar
         self.logger.info('look at me, I override the original GNES faiss indexer')
-
diff --git a/tests/contrib/fake_faiss2.py b/tests/contrib/fake_faiss2.py
index 68003011..4a19a493 100644
--- a/tests/contrib/fake_faiss2.py
+++ b/tests/contrib/fake_faiss2.py
@@ -8,4 +8,3 @@ def __init__(self, bar: int, *args, **kwargs):
         self.is_trained = True
         self.bar = bar
         self.logger.info('look at me, I override the overrided faiss indexer!!!')
-
diff --git a/tests/test_annoyindexer.py b/tests/test_annoyindexer.py
index 014cadf7..17252ec1 100644
--- a/tests/test_annoyindexer.py
+++ b/tests/test_annoyindexer.py
@@ -1,10 +1,8 @@
 import os
-import shutil
 import unittest
 
 import numpy as np
 
-from gnes.helper import touch_dir
 from gnes.indexer.vector.annoy import AnnoyIndexer
 
 
diff --git a/tests/test_audio_preprocessor.py b/tests/test_audio_preprocessor.py
index f941ac1f..8bde495f 100644
--- a/tests/test_audio_preprocessor.py
+++ b/tests/test_audio_preprocessor.py
@@ -43,4 +43,4 @@ def test_video_preprocessor_service_realdata(self):
                     self.assertGreater(len(d.chunks), 0)
                     for _ in range(len(d.chunks)):
                         shape = blob2array(d.chunks[_].blob).shape
-                        self.assertEqual(len(shape), 1)
\ No newline at end of file
+                        self.assertEqual(len(shape), 1)
diff --git a/tests/test_bindexer.py b/tests/test_bindexer.py
index def921e0..a62c8722 100644
--- a/tests/test_bindexer.py
+++ b/tests/test_bindexer.py
@@ -22,7 +22,7 @@ def setUp(self):
                                    [3, 2, 1, 2]]).astype(np.uint8)
 
         self.toy_exp = [[(234, 0, 1., 1,), (123, 1, 1., 1)], [(432, 0, 1., 1), (1, 0, 1., 1)],
-                         [(234, 0, 1., 0.75), (123, 1, 1., 0.75)]]
+                        [(234, 0, 1., 0.75), (123, 1, 1., 0.75)]]
         self.weights = [1.] * len(self.toy_label)
 
         dirname = os.path.dirname(__file__)
diff --git a/tests/test_dict_indexer.py b/tests/test_dict_indexer.py
index 851a4d68..457156bc 100644
--- a/tests/test_dict_indexer.py
+++ b/tests/test_dict_indexer.py
@@ -48,5 +48,3 @@ def test_query_docs(self):
         res = self.db.query(query_list)
         num_non_empty = sum(1 for d in res if d)
         self.assertEqual(num_non_empty, 1)
-
-
diff --git a/tests/test_gif.py b/tests/test_gif.py
index 9617be61..9267de5e 100644
--- a/tests/test_gif.py
+++ b/tests/test_gif.py
@@ -1,10 +1,10 @@
+import copy
 import os
 import unittest
 
 from gnes.preprocessor.base import BasePreprocessor
 from gnes.preprocessor.video.ffmpeg import FFmpegVideoSegmentor
 from gnes.proto import gnes_pb2
-import copy
 
 
 class TestPartition(unittest.TestCase):
diff --git a/tests/test_gpt_encoder.py b/tests/test_gpt_encoder.py
index 26031dbc..76d96380 100644
--- a/tests/test_gpt_encoder.py
+++ b/tests/test_gpt_encoder.py
@@ -39,4 +39,4 @@ def test_dump_load(self):
 
     def tearDown(self):
         if os.path.exists(self.dump_path):
-            os.remove(self.dump_path)
\ No newline at end of file
+            os.remove(self.dump_path)
diff --git a/tests/test_hash_encoder.py b/tests/test_hash_encoder.py
index 86a36cec..36e0e2ee 100644
--- a/tests/test_hash_encoder.py
+++ b/tests/test_hash_encoder.py
@@ -31,7 +31,7 @@ def test_train_pred(self):
 
         out = m.encode(self.test_data)
         self.assertEqual(self.x, out.shape[0])
-        self.assertEqual(self.num_idx+self.num_bytes, out.shape[1])
+        self.assertEqual(self.num_idx + self.num_bytes, out.shape[1])
         self.assertEqual(np.uint32, out.dtype)
 
     def test_yaml_load(self):
@@ -39,4 +39,4 @@ def test_yaml_load(self):
         pca_hash.train(self.test_data)
         out = pca_hash.encode(self.test_data)
         self.assertEqual(self.x, out.shape[0])
-        self.assertEqual(self.num_idx+self.num_bytes, out.shape[1])
+        self.assertEqual(self.num_idx + self.num_bytes, out.shape[1])
diff --git a/tests/test_hash_indexer.py b/tests/test_hash_indexer.py
index efb36308..6c4ec711 100644
--- a/tests/test_hash_indexer.py
+++ b/tests/test_hash_indexer.py
@@ -1,8 +1,10 @@
 import os
 import unittest
+
 import numpy as np
+
 from gnes.indexer.vector.hbindexer import HBIndexer
-import shutil
+
 
 class TestMHIndexer(unittest.TestCase):
 
@@ -13,7 +15,7 @@ def setUp(self):
         self.n = 100
 
         self.test_label = [(_, 1) for _ in range(self.n)]
-        t = np.random.randint(0, 100, size=[self.n, self.n_idx+self.num_bytes])
+        t = np.random.randint(0, 100, size=[self.n, self.n_idx + self.num_bytes])
         self.test_data = t.astype(np.uint32)
         self.weights = [1.] * len(self.test_label)
         self.data_path = 'test_path'
diff --git a/tests/test_image_encoder.py b/tests/test_image_encoder.py
index 07f60501..d1013ac8 100644
--- a/tests/test_image_encoder.py
+++ b/tests/test_image_encoder.py
@@ -3,7 +3,7 @@
 import unittest
 import zipfile
 
-from gnes.encoder.image.base import BasePytorchEncoder
+from gnes.encoder.base import BaseEncoder
 from gnes.preprocessor.base import UnaryPreprocessor, PipelinePreprocessor
 from gnes.preprocessor.image.resize import ResizeChunkPreprocessor
 from gnes.preprocessor.image.sliding_window import VanillaSlidingPreprocessor
@@ -45,7 +45,7 @@ def setUp(self):
         self.mobilenet_yaml = os.path.join(dirname, 'yaml', 'mobilenet-encoder.yml')
 
     def test_vgg_encoding(self):
-        self.encoder = BasePytorchEncoder.load_yaml(self.vgg_yaml)
+        self.encoder = BaseEncoder.load_yaml(self.vgg_yaml)
         for test_img in self.test_img:
             vec = self.encoder.encode(test_img)
             print("the length of data now is:", len(test_img))
@@ -53,7 +53,7 @@ def test_vgg_encoding(self):
             self.assertEqual(vec.shape[1], 4096)
 
     def test_resnet_encoding(self):
-        self.encoder = BasePytorchEncoder.load_yaml(self.res_yaml)
+        self.encoder = BaseEncoder.load_yaml(self.res_yaml)
         for test_img in self.test_img:
             vec = self.encoder.encode(test_img)
             print("the length of data now is:", len(test_img))
@@ -61,7 +61,7 @@ def test_resnet_encoding(self):
             self.assertEqual(vec.shape[1], 2048)
 
     def test_inception_encoding(self):
-        self.encoder = BasePytorchEncoder.load_yaml(self.inception_yaml)
+        self.encoder = BaseEncoder.load_yaml(self.inception_yaml)
         for test_img in self.test_img:
             vec = self.encoder.encode(test_img)
             print("the length of data now is:", len(test_img))
@@ -69,7 +69,7 @@ def test_inception_encoding(self):
             self.assertEqual(vec.shape[1], 2048)
 
     def test_mobilenet_encoding(self):
-        self.encoder = BasePytorchEncoder.load_yaml(self.mobilenet_yaml)
+        self.encoder = BaseEncoder.load_yaml(self.mobilenet_yaml)
         for test_img in self.test_img:
             vec = self.encoder.encode(test_img)
             print("the length of data now is:", len(test_img))
@@ -77,11 +77,11 @@ def test_mobilenet_encoding(self):
             self.assertEqual(vec.shape[1], 1280)
 
     def test_dump_load(self):
-        self.encoder = BasePytorchEncoder.load_yaml(self.vgg_yaml)
+        self.encoder = BaseEncoder.load_yaml(self.vgg_yaml)
 
         self.encoder.dump(self.dump_path)
 
-        vgg_encoder2 = BasePytorchEncoder.load(self.dump_path)
+        vgg_encoder2 = BaseEncoder.load(self.dump_path)
 
         for test_img in self.test_img:
             vec = vgg_encoder2.encode(test_img)
diff --git a/tests/test_mfcc_encoder.py b/tests/test_mfcc_encoder.py
index 6cb97222..7210a147 100644
--- a/tests/test_mfcc_encoder.py
+++ b/tests/test_mfcc_encoder.py
@@ -31,4 +31,4 @@ def test_mfcc_encoding(self):
         vec = self.encoder.encode(self.audios)
         self.assertEqual(len(vec.shape), 2)
         self.assertEqual(vec.shape[0], len(self.audios))
-        self.assertEqual(vec.shape[1] % self.encoder.n_mfcc, 0)
\ No newline at end of file
+        self.assertEqual(vec.shape[1] % self.encoder.n_mfcc, 0)
diff --git a/tests/test_onnx_image_encoder.py b/tests/test_onnx_image_encoder.py
index fe501a70..f78d1eef 100644
--- a/tests/test_onnx_image_encoder.py
+++ b/tests/test_onnx_image_encoder.py
@@ -9,6 +9,7 @@
 from gnes.preprocessor.image.sliding_window import VanillaSlidingPreprocessor
 from gnes.proto import gnes_pb2, blob2array
 
+
 def img_process_for_test(dirname):
     zipfile_ = zipfile.ZipFile(os.path.join(dirname, 'imgs/test.zip'))
     all_bytes = [zipfile_.open(v).read() for v in zipfile_.namelist()]
@@ -31,6 +32,7 @@ def img_process_for_test(dirname):
                                           for img in test_img_copy for chunk in img.chunks])
     return test_img_all_preprocessor
 
+
 class TestONNXImageEncoder(unittest.TestCase):
 
     def setUp(self):
diff --git a/tests/test_pytorch_transformers_encoder.py b/tests/test_pytorch_transformers_encoder.py
index 271654fa..59c37b11 100644
--- a/tests/test_pytorch_transformers_encoder.py
+++ b/tests/test_pytorch_transformers_encoder.py
@@ -3,6 +3,7 @@
 
 from gnes.encoder.text.torch_transformers import TorchTransformersEncoder
 
+
 class TestTorchTransformersEncoder(unittest.TestCase):
 
     def setUp(self):
diff --git a/tests/test_router.py b/tests/test_router.py
index 3c1421d1..dbb3d823 100644
--- a/tests/test_router.py
+++ b/tests/test_router.py
@@ -17,7 +17,7 @@ def setUp(self):
         self.publish_router_yaml = '!PublishRouter {parameters: {num_part: 2}}'
         self.batch_router_yaml = '!DocBatchRouter {gnes_config: {batch_size: 2}}'
         self.reduce_router_yaml = 'BaseReduceRouter'
-        self.chunk_router_yaml = 'ChunkToDocumentRouter'
+        self.chunk_router_yaml = 'ChunkToDocRouter'
         self.chunk_sum_yaml = 'ChunkSumRouter'
         self.doc_router_yaml = 'DocFillRouter'
         self.doc_sum_yaml = 'DocSumRouter'
diff --git a/tests/test_video_preprocessor.py b/tests/test_video_preprocessor.py
index c53f1a17..bf0c0194 100644
--- a/tests/test_video_preprocessor.py
+++ b/tests/test_video_preprocessor.py
@@ -65,7 +65,7 @@ def test_video_cut_by_frame(self):
                 r = client.recv_message()
                 for d in r.request.index.docs:
                     self.assertGreater(len(d.chunks), 0)
-                    for _ in range(len(d.chunks)-1):
+                    for _ in range(len(d.chunks) - 1):
                         shape = blob2array(d.chunks[_].blob).shape
                         self.assertEqual(shape, (30, 168, 192, 3))
                     shape = blob2array(d.chunks[-1].blob).shape
diff --git a/tests/yaml/preprocessor1.yml b/tests/yaml/preprocessor1.yml
index f510a5e9..e9270bd6 100644
--- a/tests/yaml/preprocessor1.yml
+++ b/tests/yaml/preprocessor1.yml
@@ -1,4 +1,4 @@
-!TextPreprocessor
+!PunctSplitPreprocessor
 parameters:
   start_doc_id: 0
   random_doc_id: True
diff --git a/tests/yaml/router-chunk-reduce.yml b/tests/yaml/router-chunk-reduce.yml
index dd69d91a..f3eb4f3e 100644
--- a/tests/yaml/router-chunk-reduce.yml
+++ b/tests/yaml/router-chunk-reduce.yml
@@ -1 +1 @@
-!ChunkToDocumentRouter {}
\ No newline at end of file
+!ChunkToDocRouter {}
\ No newline at end of file
diff --git a/tests/yaml/test-preprocessor.yml b/tests/yaml/test-preprocessor.yml
index f510a5e9..e9270bd6 100644
--- a/tests/yaml/test-preprocessor.yml
+++ b/tests/yaml/test-preprocessor.yml
@@ -1,4 +1,4 @@
-!TextPreprocessor
+!PunctSplitPreprocessor
 parameters:
   start_doc_id: 0
   random_doc_id: True
diff --git a/tutorials/component-yaml-spec.md b/tutorials/component-yaml-spec.md
index b43e8a29..b6e656d4 100644
--- a/tutorials/component-yaml-spec.md
+++ b/tutorials/component-yaml-spec.md
@@ -65,10 +65,9 @@ In this example, we define a `BasePytorchEncoder` that loads a pretrained VGG16
 |`!CLS`| Component Type |
 |---|---|
 |`!BasePreprocessor`|Preprocessor|
-|`!TextPreprocessor`|Preprocessor|
+|`!PunctSplitPreprocessor`|Preprocessor|
 |`!BaseImagePreprocessor`|Preprocessor|
 |`!BaseTextPreprocessor`|Preprocessor|
-|`!BaseSlidingPreprocessor`|Preprocessor|
 |`!VanillaSlidingPreprocessor`|Preprocessor|
 |`!WeightedSlidingPreprocessor`|Preprocessor|
 |`!SegmentPreprocessor`|Preprocessor|
@@ -110,7 +109,7 @@ In this example, we define a `BasePytorchEncoder` that loads a pretrained VGG16
 |`!BaseRouter`|Router|
 |`!BaseMapRouter`|Router|
 |`!BaseReduceRouter`|Router|
-|`!ChunkToDocumentRouter`|Router|
+|`!ChunkToDocRouter`|Router|
 |`!DocFillRouter`|Router|
 |`!ConcatEmbedRouter`|Router|
 |`!PublishRouter`|Router|
@@ -216,7 +215,7 @@ Note that how we defines a map under `kwargs` to describe the arguments, they wi
 The examples above are all about encoder. In fact, every component including encoder, preprocessor, router, indexer can all be described with YAML and loaded to GNES. For example,
 
 ```yaml
-!TextPreprocessor
+!PunctSplitPreprocessor
 parameters:
   start_doc_id: 0
   random_doc_id: True
diff --git a/yaml-example/component/preprocessor.yml b/yaml-example/component/preprocessor.yml
index f510a5e9..e9270bd6 100644
--- a/yaml-example/component/preprocessor.yml
+++ b/yaml-example/component/preprocessor.yml
@@ -1,4 +1,4 @@
-!TextPreprocessor
+!PunctSplitPreprocessor
 parameters:
   start_doc_id: 0
   random_doc_id: True