refactor(preprocessor): add gif chunk prep

gnes-ai · Aug 22, 2019 · b9fdb41 · b9fdb41
1 parent 1a284b8
commit b9fdb41
Show file tree

Hide file tree

Showing 12 changed files with 71 additions and 53 deletions.
diff --git a/gnes/preprocessor/__init__.py b/gnes/preprocessor/__init__.py
@@ -34,7 +34,8 @@
     'ShotDetectPreprocessor': 'video.shotdetect',
     'AudioVanilla': 'audio.audio_vanilla',
     'BaseAudioPreprocessor': 'base',
-    'RawChunkPreprocessor': 'base'
+    'RawChunkPreprocessor': 'base',
+    'GifChunkPreprocessor': 'video.ffmpeg'
 }
 
 register_all_class(_cls2file_map, 'preprocessor')
diff --git a/gnes/preprocessor/base.py b/gnes/preprocessor/base.py
@@ -19,8 +19,7 @@
 import numpy as np
 
 from ..base import TrainableBase, CompositionalTrainableBase
-from ..proto import gnes_pb2, array2blob, blob2array
-from .helper import get_gif
+from ..proto import gnes_pb2, array2blob
 
 
 class BasePreprocessor(TrainableBase):
@@ -105,22 +104,13 @@ def raw_to_chunk(self, chunk: 'gnes_pb2.Chunk', raw_bytes: bytes):
 class RawChunkPreprocessor(BasePreprocessor):
 
     @staticmethod
-    def _parse_chunk(chunk: 'gnes_pb2.Chunk', doc_type, *args, **kwargs):
-        if doc_type == gnes_pb2.Document.TEXT:
-            raise NotImplementedError
-        elif doc_type == gnes_pb2.Document.IMAGE:
-            raise NotImplementedError
-        elif doc_type == gnes_pb2.Document.VIDEO:
-            return get_gif(blob2array(chunk.blob))
-        elif doc_type == gnes_pb2.Document.AUDIO:
-            raise NotImplementedError
-        else:
-            raise ValueError("doc type can only be TEXT, IMAGE, VIDEO or AUDIO!")
+    def _parse_chunk(chunk: 'gnes_pb2.Chunk', *args, **kwargs):
+        raise NotImplementedError
 
     def apply(self, doc: 'gnes_pb2.Document') -> None:
-
         if doc.raw_bytes:
             for chunk in doc.chunks:
-                chunk.raw = self._parse_chunk(chunk, doc.doc_type)
+                chunk.raw = self._parse_chunk(chunk)
         else:
             self.logger.error('bad document: "raw_bytes" is empty!')
+
diff --git a/gnes/preprocessor/image/resize.py b/gnes/preprocessor/image/resize.py
@@ -36,5 +36,5 @@ def apply(self, doc: 'gnes_pb2.Document') -> None:
         super().apply(doc)
         for c in doc.chunks:
             img = blob2array(c.blob)
-            img = np.array(Image.fromarray(img).resize((self.target_width, self.target_height)))
+            img = np.array(Image.fromarray(img.astype('uint8')).resize((self.target_width, self.target_height)))
             c.blob.CopyFrom(array2blob(img))
diff --git a/gnes/preprocessor/image/segmentation.py b/gnes/preprocessor/image/segmentation.py
@@ -67,24 +67,22 @@ def apply(self, doc: 'gnes_pb2.Document'):
             for ci, ele in enumerate(zip(chunks, weight)):
                 c = doc.chunks.add()
                 c.doc_id = doc.doc_id
-                c.blob.CopyFrom(array2blob(self._crop_resize(original_image, ele[0])))
+                c.blob.CopyFrom(array2blob(self._crop(original_image, ele[0])))
                 c.offset_1d = ci
                 c.offset_nd.x.extend(self._get_seg_offset_nd(all_subareas, index, ele[0]))
                 c.weight = self._cal_area(ele[0]) / (original_image.size[0] * original_image.size[1])
 
             c = doc.chunks.add()
             c.doc_id = doc.doc_id
-            c.blob.CopyFrom(array2blob(np.array(original_image.resize((self.target_width,
-                                                                       self.target_height)))))
+            c.blob.CopyFrom(array2blob(np.array(original_image)))
             c.offset_1d = len(chunks)
             c.offset_nd.x.extend([100, 100])
             c.weight = 1.
         else:
             self.logger.error('bad document: "raw_bytes" is empty!')
 
-    def _crop_resize(self, original_image, coordinates):
-        return np.array(original_image.crop(coordinates).resize((self.target_width,
-                                                                 self.target_height)))
+    def _crop(self, original_image, coordinates):
+        return np.array(original_image.crop(coordinates))
 
     def _get_seg_offset_nd(self, all_subareas: List[List[int]], index: List[List[int]], chunk: List[int]) -> List[int]:
         iou_list = [self._cal_iou(area, chunk) for area in all_subareas]

diff --git a/gnes/preprocessor/image/sliding_window.py b/gnes/preprocessor/image/sliding_window.py
@@ -21,7 +21,6 @@
 
 from .resize import SizedPreprocessor
 from ..helper import get_all_subarea, torch_transform
-from ..video.ffmpeg import FFmpegPreprocessor
 from ...proto import gnes_pb2, array2blob
 
 
@@ -88,8 +87,7 @@ def _get_all_sliding_window(self, img: 'np.ndarray'):
             for y in range(expanded_input.shape[1])]
 
         expanded_input = expanded_input.reshape((-1, self.window_size, self.window_size, 3))
-        return [np.array(Image.fromarray(img).resize((self.target_width, self.target_height))) for img in
-                expanded_input], center_point_list
+        return [np.array(Image.fromarray(img)) for img in expanded_input], center_point_list
 
     def _get_slid_offset_nd(self, all_subareas: List[List[int]], index: List[List[int]], center_point: List[float]) -> \
             List[int]:
@@ -129,6 +127,7 @@ def _get_all_chunks_weight(self, image_set: List['np.ndarray']) -> List[float]:
 
 
 class WeightedSlidingPreprocessor(_SlidingPreprocessor):
-
     def _get_all_chunks_weight(self, image_set: List['np.ndarray']) -> List[float]:
+        from ..video.ffmpeg import FFmpegPreprocessor
+
         return FFmpegPreprocessor.pic_weight(image_set)
diff --git a/gnes/preprocessor/video/ffmpeg.py b/gnes/preprocessor/video/ffmpeg.py
@@ -18,9 +18,9 @@
 
 import numpy as np
 
-from ..base import BaseVideoPreprocessor
-from ..helper import get_video_frames, split_video_frames, phash_descriptor
-from ...proto import gnes_pb2, array2blob
+from ..base import BaseVideoPreprocessor, RawChunkPreprocessor
+from ..helper import get_video_frames, split_video_frames, phash_descriptor, get_gif
+from ...proto import gnes_pb2, array2blob, blob2array
 
 
 class FFmpegPreprocessor(BaseVideoPreprocessor):
@@ -111,17 +111,13 @@ def __init__(self,
                  max_frames_per_doc: int = -1,
                  use_image_input: bool = False,
                  splitter: str = '__split__',
-                 audio_interval: int = 30,
-                 sample_rate: int = 16000,
                  *args,
                  **kwargs):
         super().__init__(*args, **kwargs)
         self.segment_method = segment_method
         self.segment_interval = segment_interval
         self.segment_num = segment_num
         self.max_frames_per_doc = max_frames_per_doc
-        self.audio_interval = audio_interval
-        self.sample_rate = sample_rate
         self.splitter = splitter
         self.use_image_input = use_image_input
         self._ffmpeg_kwargs = kwargs
@@ -180,3 +176,9 @@ def apply(self, doc: 'gnes_pb2.Document') -> None:
                 self.logger.info('bad document: no key frames extracted')
         else:
             self.logger.error('bad document: "raw_bytes" is empty!')
+
+
+class GifChunkPreprocessor(RawChunkPreprocessor, BaseVideoPreprocessor):
+    @staticmethod
+    def _parse_chunk(chunk: 'gnes_pb2.Chunk', *args, **kwargs):
+        return get_gif(blob2array(chunk.blob))
diff --git a/tests/test_image_encoder.py b/tests/test_image_encoder.py
@@ -23,8 +23,12 @@ def img_process_for_test(dirname):
     pipline_prep1 = PipelinePreprocessor()
     pipline_prep1.components = lambda: [UnaryPreprocessor(doc_type=gnes_pb2.Document.IMAGE),
                                         ResizeChunkPreprocessor()]
+    pipline_prep2 = PipelinePreprocessor()
+    pipline_prep2.components = lambda: [VanillaSlidingPreprocessor(),
+                                        ResizeChunkPreprocessor()]
+
     for preprocessor in [pipline_prep1,
-                         VanillaSlidingPreprocessor()]:
+                         pipline_prep2]:
         test_img_copy = copy.deepcopy(test_img)
         for img in test_img_copy:
             preprocessor.apply(img)

diff --git a/tests/test_image_preprocessor.py b/tests/test_image_preprocessor.py
@@ -161,7 +161,7 @@ def test_slidingwindow_preprocessor_service_realdata(self):
                 msg.request.index.CopyFrom(req.index)
                 client.send_message(msg)
                 r = client.recv_message()
-                self.assertEqual(r.envelope.routes[0].service, 'VanillaSlidingPreprocessor')
+                self.assertEqual(r.envelope.routes[0].service, 'PipelinePreprocessor')
                 for d in r.request.index.docs:
                     self.assertEqual(len(blob2array(d.chunks[0].blob).shape), 3)
                     self.assertEqual(blob2array(d.chunks[0].blob).shape[-1], 3)
@@ -186,7 +186,7 @@ def test_segmentation_preprocessor_service_realdata(self):
                 msg.request.index.CopyFrom(req.index)
                 client.send_message(msg)
                 r = client.recv_message()
-                self.assertEqual(r.envelope.routes[0].service, 'SegmentPreprocessor')
+                self.assertEqual(r.envelope.routes[0].service, 'PipelinePreprocessor')
                 for d in r.request.index.docs:
                     self.assertEqual(len(blob2array(d.chunks[0].blob).shape), 3)
                     self.assertEqual(blob2array(d.chunks[0].blob).shape[-1], 3)

diff --git a/tests/test_onnx_image_encoder.py b/tests/test_onnx_image_encoder.py
@@ -23,8 +23,12 @@ def img_process_for_test(dirname):
     pipline_prep1 = PipelinePreprocessor()
     pipline_prep1.components = lambda: [UnaryPreprocessor(doc_type=gnes_pb2.Document.IMAGE),
                                         ResizeChunkPreprocessor()]
+    pipline_prep2 = PipelinePreprocessor()
+    pipline_prep2.components = lambda: [VanillaSlidingPreprocessor(),
+                                        ResizeChunkPreprocessor()]
+
     for preprocessor in [pipline_prep1,
-                         VanillaSlidingPreprocessor()]:
+                         pipline_prep2]:
         test_img_copy = copy.deepcopy(test_img)
         for img in test_img_copy:
             preprocessor.apply(img)

diff --git a/tests/yaml/base-segmentation-image-prep.yml b/tests/yaml/base-segmentation-image-prep.yml
@@ -1,9 +1,17 @@
-!SegmentPreprocessor
-parameters:
-  model_dir: ${FASTERRCNN_MODEL}
-  model_name: fasterrcnn_resnet50_fpn
-  target_img_size: 224
-  _use_cuda: false
+!PipelinePreprocessor
+components:
+  - !SegmentPreprocessor
+    parameters:
+      model_dir: ${FASTERRCNN_MODEL}
+      model_name: fasterrcnn_resnet50_fpn
+      _use_cuda: false
+    gnes_config:
+      name: fasterRCNN
+      is_trained: true
+  - !ResizeChunkPreprocessor
+    parameters:
+      target_height: 224
+      target_width: 224
 gnes_config:
-  name: fasterRCNN
-  is_trained: true
+  name: pipeline_preprocessor
+  work_dir: /save
diff --git a/tests/yaml/base-vanilla_sldwin-image-prep.yml b/tests/yaml/base-vanilla_sldwin-image-prep.yml
@@ -1,5 +1,17 @@
-!VanillaSlidingPreprocessor
-parameters:
-  window_size: 64
-  stride_height: 64
-  stride_wide: 64
+!PipelinePreprocessor
+components:
+  - !VanillaSlidingPreprocessor
+    parameters:
+      window_size: 64
+      stride_height: 64
+      stride_wide: 64
+  - !ResizeChunkPreprocessor
+    parameters:
+      target_width: 224
+      target_height: 224
+    gnes_config:
+      name: resize
+      is_trained: true
+gnes_config:
+  name: PipelinePreprocessor
+  work_dir: /save
diff --git a/tests/yaml/pipe-gif.yml b/tests/yaml/pipe-gif.yml
@@ -9,9 +9,9 @@ components:
     gnes_config:
       name: FFmpegVideoSegmentor
       is_trained: true
-  - !RawChunkPreprocessor
+  - !GifChunkPreprocessor
     gnes_config:
-      name: RawChunkPreprocessor
+      name: GifChunkPreprocessor
       is_trained: true
 gnes_config:
   name: PipelinePreprocessor