From b9fdb4159b784937eda00d445213e2fdfdd4d0b5 Mon Sep 17 00:00:00 2001 From: Jem Date: Thu, 22 Aug 2019 20:08:22 +0800 Subject: [PATCH] refactor(preprocessor): add gif chunk prep --- gnes/preprocessor/__init__.py | 3 ++- gnes/preprocessor/base.py | 20 ++++------------ gnes/preprocessor/image/resize.py | 2 +- gnes/preprocessor/image/segmentation.py | 10 ++++---- gnes/preprocessor/image/sliding_window.py | 7 +++--- gnes/preprocessor/video/ffmpeg.py | 16 +++++++------ tests/test_image_encoder.py | 6 ++++- tests/test_image_preprocessor.py | 4 ++-- tests/test_onnx_image_encoder.py | 6 ++++- tests/yaml/base-segmentation-image-prep.yml | 24 ++++++++++++------- tests/yaml/base-vanilla_sldwin-image-prep.yml | 22 +++++++++++++---- tests/yaml/pipe-gif.yml | 4 ++-- 12 files changed, 71 insertions(+), 53 deletions(-) diff --git a/gnes/preprocessor/__init__.py b/gnes/preprocessor/__init__.py index 689d251d..1a8e5538 100644 --- a/gnes/preprocessor/__init__.py +++ b/gnes/preprocessor/__init__.py @@ -34,7 +34,8 @@ 'ShotDetectPreprocessor': 'video.shotdetect', 'AudioVanilla': 'audio.audio_vanilla', 'BaseAudioPreprocessor': 'base', - 'RawChunkPreprocessor': 'base' + 'RawChunkPreprocessor': 'base', + 'GifChunkPreprocessor': 'video.ffmpeg' } register_all_class(_cls2file_map, 'preprocessor') diff --git a/gnes/preprocessor/base.py b/gnes/preprocessor/base.py index 875ab9b9..4d855585 100644 --- a/gnes/preprocessor/base.py +++ b/gnes/preprocessor/base.py @@ -19,8 +19,7 @@ import numpy as np from ..base import TrainableBase, CompositionalTrainableBase -from ..proto import gnes_pb2, array2blob, blob2array -from .helper import get_gif +from ..proto import gnes_pb2, array2blob class BasePreprocessor(TrainableBase): @@ -105,22 +104,13 @@ def raw_to_chunk(self, chunk: 'gnes_pb2.Chunk', raw_bytes: bytes): class RawChunkPreprocessor(BasePreprocessor): @staticmethod - def _parse_chunk(chunk: 'gnes_pb2.Chunk', doc_type, *args, **kwargs): - if doc_type == gnes_pb2.Document.TEXT: - raise NotImplementedError - elif doc_type == gnes_pb2.Document.IMAGE: - raise NotImplementedError - elif doc_type == gnes_pb2.Document.VIDEO: - return get_gif(blob2array(chunk.blob)) - elif doc_type == gnes_pb2.Document.AUDIO: - raise NotImplementedError - else: - raise ValueError("doc type can only be TEXT, IMAGE, VIDEO or AUDIO!") + def _parse_chunk(chunk: 'gnes_pb2.Chunk', *args, **kwargs): + raise NotImplementedError def apply(self, doc: 'gnes_pb2.Document') -> None: - if doc.raw_bytes: for chunk in doc.chunks: - chunk.raw = self._parse_chunk(chunk, doc.doc_type) + chunk.raw = self._parse_chunk(chunk) else: self.logger.error('bad document: "raw_bytes" is empty!') + diff --git a/gnes/preprocessor/image/resize.py b/gnes/preprocessor/image/resize.py index e23ed0a4..af8775a7 100644 --- a/gnes/preprocessor/image/resize.py +++ b/gnes/preprocessor/image/resize.py @@ -36,5 +36,5 @@ def apply(self, doc: 'gnes_pb2.Document') -> None: super().apply(doc) for c in doc.chunks: img = blob2array(c.blob) - img = np.array(Image.fromarray(img).resize((self.target_width, self.target_height))) + img = np.array(Image.fromarray(img.astype('uint8')).resize((self.target_width, self.target_height))) c.blob.CopyFrom(array2blob(img)) diff --git a/gnes/preprocessor/image/segmentation.py b/gnes/preprocessor/image/segmentation.py index cc87d9bc..98920cd4 100644 --- a/gnes/preprocessor/image/segmentation.py +++ b/gnes/preprocessor/image/segmentation.py @@ -67,24 +67,22 @@ def apply(self, doc: 'gnes_pb2.Document'): for ci, ele in enumerate(zip(chunks, weight)): c = doc.chunks.add() c.doc_id = doc.doc_id - c.blob.CopyFrom(array2blob(self._crop_resize(original_image, ele[0]))) + c.blob.CopyFrom(array2blob(self._crop(original_image, ele[0]))) c.offset_1d = ci c.offset_nd.x.extend(self._get_seg_offset_nd(all_subareas, index, ele[0])) c.weight = self._cal_area(ele[0]) / (original_image.size[0] * original_image.size[1]) c = doc.chunks.add() c.doc_id = doc.doc_id - c.blob.CopyFrom(array2blob(np.array(original_image.resize((self.target_width, - self.target_height))))) + c.blob.CopyFrom(array2blob(np.array(original_image))) c.offset_1d = len(chunks) c.offset_nd.x.extend([100, 100]) c.weight = 1. else: self.logger.error('bad document: "raw_bytes" is empty!') - def _crop_resize(self, original_image, coordinates): - return np.array(original_image.crop(coordinates).resize((self.target_width, - self.target_height))) + def _crop(self, original_image, coordinates): + return np.array(original_image.crop(coordinates)) def _get_seg_offset_nd(self, all_subareas: List[List[int]], index: List[List[int]], chunk: List[int]) -> List[int]: iou_list = [self._cal_iou(area, chunk) for area in all_subareas] diff --git a/gnes/preprocessor/image/sliding_window.py b/gnes/preprocessor/image/sliding_window.py index 64e973a9..d173dc8c 100644 --- a/gnes/preprocessor/image/sliding_window.py +++ b/gnes/preprocessor/image/sliding_window.py @@ -21,7 +21,6 @@ from .resize import SizedPreprocessor from ..helper import get_all_subarea, torch_transform -from ..video.ffmpeg import FFmpegPreprocessor from ...proto import gnes_pb2, array2blob @@ -88,8 +87,7 @@ def _get_all_sliding_window(self, img: 'np.ndarray'): for y in range(expanded_input.shape[1])] expanded_input = expanded_input.reshape((-1, self.window_size, self.window_size, 3)) - return [np.array(Image.fromarray(img).resize((self.target_width, self.target_height))) for img in - expanded_input], center_point_list + return [np.array(Image.fromarray(img)) for img in expanded_input], center_point_list def _get_slid_offset_nd(self, all_subareas: List[List[int]], index: List[List[int]], center_point: List[float]) -> \ List[int]: @@ -129,6 +127,7 @@ def _get_all_chunks_weight(self, image_set: List['np.ndarray']) -> List[float]: class WeightedSlidingPreprocessor(_SlidingPreprocessor): - def _get_all_chunks_weight(self, image_set: List['np.ndarray']) -> List[float]: + from ..video.ffmpeg import FFmpegPreprocessor + return FFmpegPreprocessor.pic_weight(image_set) diff --git a/gnes/preprocessor/video/ffmpeg.py b/gnes/preprocessor/video/ffmpeg.py index dd353ca6..bd7c7235 100644 --- a/gnes/preprocessor/video/ffmpeg.py +++ b/gnes/preprocessor/video/ffmpeg.py @@ -18,9 +18,9 @@ import numpy as np -from ..base import BaseVideoPreprocessor -from ..helper import get_video_frames, split_video_frames, phash_descriptor -from ...proto import gnes_pb2, array2blob +from ..base import BaseVideoPreprocessor, RawChunkPreprocessor +from ..helper import get_video_frames, split_video_frames, phash_descriptor, get_gif +from ...proto import gnes_pb2, array2blob, blob2array class FFmpegPreprocessor(BaseVideoPreprocessor): @@ -111,8 +111,6 @@ def __init__(self, max_frames_per_doc: int = -1, use_image_input: bool = False, splitter: str = '__split__', - audio_interval: int = 30, - sample_rate: int = 16000, *args, **kwargs): super().__init__(*args, **kwargs) @@ -120,8 +118,6 @@ def __init__(self, self.segment_interval = segment_interval self.segment_num = segment_num self.max_frames_per_doc = max_frames_per_doc - self.audio_interval = audio_interval - self.sample_rate = sample_rate self.splitter = splitter self.use_image_input = use_image_input self._ffmpeg_kwargs = kwargs @@ -180,3 +176,9 @@ def apply(self, doc: 'gnes_pb2.Document') -> None: self.logger.info('bad document: no key frames extracted') else: self.logger.error('bad document: "raw_bytes" is empty!') + + +class GifChunkPreprocessor(RawChunkPreprocessor, BaseVideoPreprocessor): + @staticmethod + def _parse_chunk(chunk: 'gnes_pb2.Chunk', *args, **kwargs): + return get_gif(blob2array(chunk.blob)) diff --git a/tests/test_image_encoder.py b/tests/test_image_encoder.py index d1013ac8..9c78e6c6 100644 --- a/tests/test_image_encoder.py +++ b/tests/test_image_encoder.py @@ -23,8 +23,12 @@ def img_process_for_test(dirname): pipline_prep1 = PipelinePreprocessor() pipline_prep1.components = lambda: [UnaryPreprocessor(doc_type=gnes_pb2.Document.IMAGE), ResizeChunkPreprocessor()] + pipline_prep2 = PipelinePreprocessor() + pipline_prep2.components = lambda: [VanillaSlidingPreprocessor(), + ResizeChunkPreprocessor()] + for preprocessor in [pipline_prep1, - VanillaSlidingPreprocessor()]: + pipline_prep2]: test_img_copy = copy.deepcopy(test_img) for img in test_img_copy: preprocessor.apply(img) diff --git a/tests/test_image_preprocessor.py b/tests/test_image_preprocessor.py index edb1bd3b..a9eace89 100644 --- a/tests/test_image_preprocessor.py +++ b/tests/test_image_preprocessor.py @@ -161,7 +161,7 @@ def test_slidingwindow_preprocessor_service_realdata(self): msg.request.index.CopyFrom(req.index) client.send_message(msg) r = client.recv_message() - self.assertEqual(r.envelope.routes[0].service, 'VanillaSlidingPreprocessor') + self.assertEqual(r.envelope.routes[0].service, 'PipelinePreprocessor') for d in r.request.index.docs: self.assertEqual(len(blob2array(d.chunks[0].blob).shape), 3) self.assertEqual(blob2array(d.chunks[0].blob).shape[-1], 3) @@ -186,7 +186,7 @@ def test_segmentation_preprocessor_service_realdata(self): msg.request.index.CopyFrom(req.index) client.send_message(msg) r = client.recv_message() - self.assertEqual(r.envelope.routes[0].service, 'SegmentPreprocessor') + self.assertEqual(r.envelope.routes[0].service, 'PipelinePreprocessor') for d in r.request.index.docs: self.assertEqual(len(blob2array(d.chunks[0].blob).shape), 3) self.assertEqual(blob2array(d.chunks[0].blob).shape[-1], 3) diff --git a/tests/test_onnx_image_encoder.py b/tests/test_onnx_image_encoder.py index f78d1eef..95d00c1b 100644 --- a/tests/test_onnx_image_encoder.py +++ b/tests/test_onnx_image_encoder.py @@ -23,8 +23,12 @@ def img_process_for_test(dirname): pipline_prep1 = PipelinePreprocessor() pipline_prep1.components = lambda: [UnaryPreprocessor(doc_type=gnes_pb2.Document.IMAGE), ResizeChunkPreprocessor()] + pipline_prep2 = PipelinePreprocessor() + pipline_prep2.components = lambda: [VanillaSlidingPreprocessor(), + ResizeChunkPreprocessor()] + for preprocessor in [pipline_prep1, - VanillaSlidingPreprocessor()]: + pipline_prep2]: test_img_copy = copy.deepcopy(test_img) for img in test_img_copy: preprocessor.apply(img) diff --git a/tests/yaml/base-segmentation-image-prep.yml b/tests/yaml/base-segmentation-image-prep.yml index 445cd0f1..26ebd78c 100644 --- a/tests/yaml/base-segmentation-image-prep.yml +++ b/tests/yaml/base-segmentation-image-prep.yml @@ -1,9 +1,17 @@ -!SegmentPreprocessor -parameters: - model_dir: ${FASTERRCNN_MODEL} - model_name: fasterrcnn_resnet50_fpn - target_img_size: 224 - _use_cuda: false +!PipelinePreprocessor +components: + - !SegmentPreprocessor + parameters: + model_dir: ${FASTERRCNN_MODEL} + model_name: fasterrcnn_resnet50_fpn + _use_cuda: false + gnes_config: + name: fasterRCNN + is_trained: true + - !ResizeChunkPreprocessor + parameters: + target_height: 224 + target_width: 224 gnes_config: - name: fasterRCNN - is_trained: true \ No newline at end of file + name: pipeline_preprocessor + work_dir: /save diff --git a/tests/yaml/base-vanilla_sldwin-image-prep.yml b/tests/yaml/base-vanilla_sldwin-image-prep.yml index 6130b53a..5082b312 100644 --- a/tests/yaml/base-vanilla_sldwin-image-prep.yml +++ b/tests/yaml/base-vanilla_sldwin-image-prep.yml @@ -1,5 +1,17 @@ -!VanillaSlidingPreprocessor -parameters: - window_size: 64 - stride_height: 64 - stride_wide: 64 +!PipelinePreprocessor +components: + - !VanillaSlidingPreprocessor + parameters: + window_size: 64 + stride_height: 64 + stride_wide: 64 + - !ResizeChunkPreprocessor + parameters: + target_width: 224 + target_height: 224 + gnes_config: + name: resize + is_trained: true +gnes_config: + name: PipelinePreprocessor + work_dir: /save diff --git a/tests/yaml/pipe-gif.yml b/tests/yaml/pipe-gif.yml index 4b1d292c..0de988d7 100644 --- a/tests/yaml/pipe-gif.yml +++ b/tests/yaml/pipe-gif.yml @@ -9,9 +9,9 @@ components: gnes_config: name: FFmpegVideoSegmentor is_trained: true - - !RawChunkPreprocessor + - !GifChunkPreprocessor gnes_config: - name: RawChunkPreprocessor + name: GifChunkPreprocessor is_trained: true gnes_config: name: PipelinePreprocessor