fix(ci): fix unit tests for modules

gnes-ai · Aug 21, 2019 · 944b8c0 · 944b8c0
1 parent 27dc34d
commit 944b8c0
Show file tree

Hide file tree

Showing 23 changed files with 32 additions and 34 deletions.
diff --git a/README.md b/README.md
@@ -278,7 +278,7 @@ Now let's see what the YAML config says. First impression, it is pretty intuitiv
  <summary>Preprocessor config: text-prep.yml (click to expand...)</summary>
 
 ```yaml
-!TextPreprocessor
+!PunctSplitPreprocessor
 parameters:
   start_doc_id: 0
   random_doc_id: True

diff --git a/gnes/preprocessor/__init__.py b/gnes/preprocessor/__init__.py
@@ -33,7 +33,7 @@
     'FFmpegVideoSegmentor': 'video.ffmpeg',
     'ShotDetectPreprocessor': 'video.shotdetect',
     'AudioVanilla': 'audio.audio_vanilla',
-    'BaseAudioPreprocessor': 'base'
+    'BaseAudioPreprocessor': 'base',
     'RawChunkPreprocessor': 'base'
 }
 

diff --git a/tests/contrib/fake_faiss.py b/tests/contrib/fake_faiss.py
@@ -8,4 +8,3 @@ def __init__(self, bar: int, *args, **kwargs):
         self.is_trained = True
         self.bar = bar
         self.logger.info('look at me, I override the original GNES faiss indexer')
-
diff --git a/tests/contrib/fake_faiss2.py b/tests/contrib/fake_faiss2.py
@@ -8,4 +8,3 @@ def __init__(self, bar: int, *args, **kwargs):
         self.is_trained = True
         self.bar = bar
         self.logger.info('look at me, I override the overrided faiss indexer!!!')
-
diff --git a/tests/test_annoyindexer.py b/tests/test_annoyindexer.py
@@ -1,10 +1,8 @@
 import os
-import shutil
 import unittest
 
 import numpy as np
 
-from gnes.helper import touch_dir
 from gnes.indexer.vector.annoy import AnnoyIndexer
 
 

diff --git a/tests/test_audio_preprocessor.py b/tests/test_audio_preprocessor.py
@@ -43,4 +43,4 @@ def test_video_preprocessor_service_realdata(self):
                     self.assertGreater(len(d.chunks), 0)
                     for _ in range(len(d.chunks)):
                         shape = blob2array(d.chunks[_].blob).shape
-                        self.assertEqual(len(shape), 1)
+                        self.assertEqual(len(shape), 1)
diff --git a/tests/test_bindexer.py b/tests/test_bindexer.py
@@ -22,7 +22,7 @@ def setUp(self):
                                    [3, 2, 1, 2]]).astype(np.uint8)
 
         self.toy_exp = [[(234, 0, 1., 1,), (123, 1, 1., 1)], [(432, 0, 1., 1), (1, 0, 1., 1)],
-                         [(234, 0, 1., 0.75), (123, 1, 1., 0.75)]]
+                        [(234, 0, 1., 0.75), (123, 1, 1., 0.75)]]
         self.weights = [1.] * len(self.toy_label)
 
         dirname = os.path.dirname(__file__)

diff --git a/tests/test_dict_indexer.py b/tests/test_dict_indexer.py
@@ -48,5 +48,3 @@ def test_query_docs(self):
         res = self.db.query(query_list)
         num_non_empty = sum(1 for d in res if d)
         self.assertEqual(num_non_empty, 1)
-
-
diff --git a/tests/test_gif.py b/tests/test_gif.py
@@ -1,10 +1,10 @@
+import copy
 import os
 import unittest
 
 from gnes.preprocessor.base import BasePreprocessor
 from gnes.preprocessor.video.ffmpeg import FFmpegVideoSegmentor
 from gnes.proto import gnes_pb2
-import copy
 
 
 class TestPartition(unittest.TestCase):

diff --git a/tests/test_gpt_encoder.py b/tests/test_gpt_encoder.py
@@ -39,4 +39,4 @@ def test_dump_load(self):
 
     def tearDown(self):
         if os.path.exists(self.dump_path):
-            os.remove(self.dump_path)
+            os.remove(self.dump_path)
diff --git a/tests/test_hash_encoder.py b/tests/test_hash_encoder.py
@@ -31,12 +31,12 @@ def test_train_pred(self):
 
         out = m.encode(self.test_data)
         self.assertEqual(self.x, out.shape[0])
-        self.assertEqual(self.num_idx+self.num_bytes, out.shape[1])
+        self.assertEqual(self.num_idx + self.num_bytes, out.shape[1])
         self.assertEqual(np.uint32, out.dtype)
 
     def test_yaml_load(self):
         pca_hash = PipelineEncoder.load_yaml(self.hash_yaml)
         pca_hash.train(self.test_data)
         out = pca_hash.encode(self.test_data)
         self.assertEqual(self.x, out.shape[0])
-        self.assertEqual(self.num_idx+self.num_bytes, out.shape[1])
+        self.assertEqual(self.num_idx + self.num_bytes, out.shape[1])
diff --git a/tests/test_hash_indexer.py b/tests/test_hash_indexer.py
@@ -1,8 +1,10 @@
 import os
 import unittest
+
 import numpy as np
+
 from gnes.indexer.vector.hbindexer import HBIndexer
-import shutil
+
 
 class TestMHIndexer(unittest.TestCase):
 
@@ -13,7 +15,7 @@ def setUp(self):
         self.n = 100
 
         self.test_label = [(_, 1) for _ in range(self.n)]
-        t = np.random.randint(0, 100, size=[self.n, self.n_idx+self.num_bytes])
+        t = np.random.randint(0, 100, size=[self.n, self.n_idx + self.num_bytes])
         self.test_data = t.astype(np.uint32)
         self.weights = [1.] * len(self.test_label)
         self.data_path = 'test_path'

diff --git a/tests/test_image_encoder.py b/tests/test_image_encoder.py
@@ -3,7 +3,7 @@
 import unittest
 import zipfile
 
-from gnes.encoder.image.base import BasePytorchEncoder
+from gnes.encoder.base import BaseEncoder
 from gnes.preprocessor.base import UnaryPreprocessor, PipelinePreprocessor
 from gnes.preprocessor.image.resize import ResizeChunkPreprocessor
 from gnes.preprocessor.image.sliding_window import VanillaSlidingPreprocessor
@@ -45,43 +45,43 @@ def setUp(self):
         self.mobilenet_yaml = os.path.join(dirname, 'yaml', 'mobilenet-encoder.yml')
 
     def test_vgg_encoding(self):
-        self.encoder = BasePytorchEncoder.load_yaml(self.vgg_yaml)
+        self.encoder = BaseEncoder.load_yaml(self.vgg_yaml)
         for test_img in self.test_img:
             vec = self.encoder.encode(test_img)
             print("the length of data now is:", len(test_img))
             self.assertEqual(vec.shape[0], len(test_img))
             self.assertEqual(vec.shape[1], 4096)
 
     def test_resnet_encoding(self):
-        self.encoder = BasePytorchEncoder.load_yaml(self.res_yaml)
+        self.encoder = BaseEncoder.load_yaml(self.res_yaml)
         for test_img in self.test_img:
             vec = self.encoder.encode(test_img)
             print("the length of data now is:", len(test_img))
             self.assertEqual(vec.shape[0], len(test_img))
             self.assertEqual(vec.shape[1], 2048)
 
     def test_inception_encoding(self):
-        self.encoder = BasePytorchEncoder.load_yaml(self.inception_yaml)
+        self.encoder = BaseEncoder.load_yaml(self.inception_yaml)
         for test_img in self.test_img:
             vec = self.encoder.encode(test_img)
             print("the length of data now is:", len(test_img))
             self.assertEqual(vec.shape[0], len(test_img))
             self.assertEqual(vec.shape[1], 2048)
 
     def test_mobilenet_encoding(self):
-        self.encoder = BasePytorchEncoder.load_yaml(self.mobilenet_yaml)
+        self.encoder = BaseEncoder.load_yaml(self.mobilenet_yaml)
         for test_img in self.test_img:
             vec = self.encoder.encode(test_img)
             print("the length of data now is:", len(test_img))
             self.assertEqual(vec.shape[0], len(test_img))
             self.assertEqual(vec.shape[1], 1280)
 
     def test_dump_load(self):
-        self.encoder = BasePytorchEncoder.load_yaml(self.vgg_yaml)
+        self.encoder = BaseEncoder.load_yaml(self.vgg_yaml)
 
         self.encoder.dump(self.dump_path)
 
-        vgg_encoder2 = BasePytorchEncoder.load(self.dump_path)
+        vgg_encoder2 = BaseEncoder.load(self.dump_path)
 
         for test_img in self.test_img:
             vec = vgg_encoder2.encode(test_img)

diff --git a/tests/test_mfcc_encoder.py b/tests/test_mfcc_encoder.py
@@ -31,4 +31,4 @@ def test_mfcc_encoding(self):
         vec = self.encoder.encode(self.audios)
         self.assertEqual(len(vec.shape), 2)
         self.assertEqual(vec.shape[0], len(self.audios))
-        self.assertEqual(vec.shape[1] % self.encoder.n_mfcc, 0)
+        self.assertEqual(vec.shape[1] % self.encoder.n_mfcc, 0)
diff --git a/tests/test_onnx_image_encoder.py b/tests/test_onnx_image_encoder.py
@@ -9,6 +9,7 @@
 from gnes.preprocessor.image.sliding_window import VanillaSlidingPreprocessor
 from gnes.proto import gnes_pb2, blob2array
 
+
 def img_process_for_test(dirname):
     zipfile_ = zipfile.ZipFile(os.path.join(dirname, 'imgs/test.zip'))
     all_bytes = [zipfile_.open(v).read() for v in zipfile_.namelist()]
@@ -31,6 +32,7 @@ def img_process_for_test(dirname):
                                           for img in test_img_copy for chunk in img.chunks])
     return test_img_all_preprocessor
 
+
 class TestONNXImageEncoder(unittest.TestCase):
 
     def setUp(self):

diff --git a/tests/test_pytorch_transformers_encoder.py b/tests/test_pytorch_transformers_encoder.py
@@ -3,6 +3,7 @@
 
 from gnes.encoder.text.torch_transformers import TorchTransformersEncoder
 
+
 class TestTorchTransformersEncoder(unittest.TestCase):
 
     def setUp(self):

diff --git a/tests/test_router.py b/tests/test_router.py
@@ -17,7 +17,7 @@ def setUp(self):
         self.publish_router_yaml = '!PublishRouter {parameters: {num_part: 2}}'
         self.batch_router_yaml = '!DocBatchRouter {gnes_config: {batch_size: 2}}'
         self.reduce_router_yaml = 'BaseReduceRouter'
-        self.chunk_router_yaml = 'ChunkToDocumentRouter'
+        self.chunk_router_yaml = 'ChunkToDocRouter'
         self.chunk_sum_yaml = 'ChunkSumRouter'
         self.doc_router_yaml = 'DocFillRouter'
         self.doc_sum_yaml = 'DocSumRouter'

diff --git a/tests/test_video_preprocessor.py b/tests/test_video_preprocessor.py
@@ -65,7 +65,7 @@ def test_video_cut_by_frame(self):
                 r = client.recv_message()
                 for d in r.request.index.docs:
                     self.assertGreater(len(d.chunks), 0)
-                    for _ in range(len(d.chunks)-1):
+                    for _ in range(len(d.chunks) - 1):
                         shape = blob2array(d.chunks[_].blob).shape
                         self.assertEqual(shape, (30, 168, 192, 3))
                     shape = blob2array(d.chunks[-1].blob).shape

diff --git a/tests/yaml/preprocessor1.yml b/tests/yaml/preprocessor1.yml
@@ -1,4 +1,4 @@
-!TextPreprocessor
+!PunctSplitPreprocessor
 parameters:
   start_doc_id: 0
   random_doc_id: True

diff --git a/tests/yaml/router-chunk-reduce.yml b/tests/yaml/router-chunk-reduce.yml
@@ -1 +1 @@
-!ChunkToDocumentRouter {}
+!ChunkToDocRouter {}
diff --git a/tests/yaml/test-preprocessor.yml b/tests/yaml/test-preprocessor.yml
@@ -1,4 +1,4 @@
-!TextPreprocessor
+!PunctSplitPreprocessor
 parameters:
   start_doc_id: 0
   random_doc_id: True

diff --git a/tutorials/component-yaml-spec.md b/tutorials/component-yaml-spec.md
@@ -65,10 +65,9 @@ In this example, we define a `BasePytorchEncoder` that loads a pretrained VGG16
 |`!CLS`| Component Type |
 |---|---|
 |`!BasePreprocessor`|Preprocessor|
-|`!TextPreprocessor`|Preprocessor|
+|`!PunctSplitPreprocessor`|Preprocessor|
 |`!BaseImagePreprocessor`|Preprocessor|
 |`!BaseTextPreprocessor`|Preprocessor|
-|`!BaseSlidingPreprocessor`|Preprocessor|
 |`!VanillaSlidingPreprocessor`|Preprocessor|
 |`!WeightedSlidingPreprocessor`|Preprocessor|
 |`!SegmentPreprocessor`|Preprocessor|
@@ -110,7 +109,7 @@ In this example, we define a `BasePytorchEncoder` that loads a pretrained VGG16
 |`!BaseRouter`|Router|
 |`!BaseMapRouter`|Router|
 |`!BaseReduceRouter`|Router|
-|`!ChunkToDocumentRouter`|Router|
+|`!ChunkToDocRouter`|Router|
 |`!DocFillRouter`|Router|
 |`!ConcatEmbedRouter`|Router|
 |`!PublishRouter`|Router|
@@ -216,7 +215,7 @@ Note that how we defines a map under `kwargs` to describe the arguments, they wi
 The examples above are all about encoder. In fact, every component including encoder, preprocessor, router, indexer can all be described with YAML and loaded to GNES. For example,
 
 ```yaml
-!TextPreprocessor
+!PunctSplitPreprocessor
 parameters:
   start_doc_id: 0
   random_doc_id: True

diff --git a/yaml-example/component/preprocessor.yml b/yaml-example/component/preprocessor.yml
@@ -1,4 +1,4 @@
-!TextPreprocessor
+!PunctSplitPreprocessor
 parameters:
   start_doc_id: 0
   random_doc_id: True
Original file line number	Diff line number	Diff line change
Expand Up		@@ -8,4 +8,3 @@ def __init__(self, bar: int, args, *kwargs):
		self.is_trained = True
		self.bar = bar
		self.logger.info('look at me, I override the original GNES faiss indexer')
Original file line number	Diff line number	Diff line change
Expand Up		@@ -48,5 +48,3 @@ def test_query_docs(self):
		res = self.db.query(query_list)
		num_non_empty = sum(1 for d in res if d)
		self.assertEqual(num_non_empty, 1)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -3,6 +3,7 @@

		from gnes.encoder.text.torch_transformers import TorchTransformersEncoder


		class TestTorchTransformersEncoder(unittest.TestCase):

		def setUp(self):
Expand Down