feat(image_preprocessor): add fasterRCNN

gnes-ai · Jul 17, 2019 · 5867f87 · 5867f87
1 parent ff86fd6
commit 5867f87
Show file tree

Hide file tree

Showing 14 changed files with 182 additions and 37 deletions.
diff --git a/docker-compose/component/image_preprocessor.yml b/docker-compose/component/image_preprocessor.yml
diff --git a/docker-compose/component/img_preprocessor_fasterRCNN.yml b/docker-compose/component/img_preprocessor_fasterRCNN.yml
@@ -0,0 +1,8 @@
+!SegmentPreprocessor
+parameter:
+  model_dir: /ext_data/image_preprocessor
+  model_name: fasterrcnn_resnet50_fpn
+  target_img_size: 224
+  _use_cuda: false
+property:
+  is_trained: true
diff --git a/docker-compose/component/img_preprocessor_singleton.yml b/docker-compose/component/img_preprocessor_singleton.yml
@@ -0,0 +1,3 @@
+!BaseSingletonPreprocessor
+parameter:
+  doc_type: 2
diff --git a/docker-compose/component/img_preprocessor_vanilla_sldwin.yml b/docker-compose/component/img_preprocessor_vanilla_sldwin.yml
@@ -0,0 +1,6 @@
+!VanillaSlidingPreprocessor
+parameter:
+  window_size: 16
+  stride_height: 16
+  stride_wide: 16
+  target_img_size: 224
diff --git a/docker-compose/component/img_preprocessor_weight_sldwin.yml b/docker-compose/component/img_preprocessor_weight_sldwin.yml
@@ -0,0 +1,6 @@
+!WeightedSlidingPreprocessor
+parameter:
+  window_size: 16
+  stride_height: 16
+  stride_wide: 16
+  target_img_size: 224
diff --git a/gnes/preprocessor/__init__.py b/gnes/preprocessor/__init__.py
@@ -25,7 +25,7 @@
     'BaseTextPreprocessor': 'text.base',
     'VanillaSlidingPreprocessor': 'image.simple',
     'WeightedSlidingPreprocessor': 'image.simple',
-    'SegmentPreprocessor': 'image.simple',
+    'SegmentPreprocessor': 'image.segmentation',
     'BaseSingletonPreprocessor': 'base',
     'BaseVideoPreprocessor': 'video.base',
     'FFmpegPreprocessor': 'video.ffmpeg',

diff --git a/gnes/preprocessor/image/base.py b/gnes/preprocessor/image/base.py
@@ -29,4 +29,10 @@ def __init__(self,
         self.is_rgb = is_rgb
 
     def _get_all_chunks_weight(self, image_set: List['np.ndarray']) -> List[float]:
-        pass
+        pass
+
+    @classmethod
+    def _torch_transform(cls, image):
+        import torchvision.transforms as transforms
+        return transforms.Compose([transforms.ToTensor(),
+                                transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))])(image)
diff --git a/gnes/preprocessor/image/segmentation.py b/gnes/preprocessor/image/segmentation.py
@@ -0,0 +1,55 @@
+from .base import BaseImagePreprocessor
+from ...proto import gnes_pb2, array2blob
+from PIL import Image
+import numpy as np
+import io
+import os
+
+
+class SegmentPreprocessor(BaseImagePreprocessor):
+
+    def __init__(self, model_name: str,
+                 model_dir: str,
+                 target_img_size: int = 224,
+                 _use_cuda: bool = False,
+                 *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.model_name = model_name
+        self.model_dir = model_dir
+        self.target_img_size = target_img_size
+        self.model_name = model_name
+        self._use_cuda = _use_cuda
+
+    def post_init(self):
+        import torch
+        import torchvision.models as models
+
+        os.environ['TORCH_HOME'] = self.model_dir
+        self._model = getattr(models.detection, self.model_name)(pretrained=True)
+        self._model = self._model.eval()
+        if self._use_cuda:
+            # self._model.cuda()
+            self._device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+            self._model = self._model.to(self._device)
+
+    def apply(self, doc: 'gnes_pb2.Document'):
+        super().apply(doc)
+        if doc.raw_bytes:
+            original_image = Image.open(io.BytesIO(doc.raw_bytes))
+            image_tensor = self._torch_transform(original_image)
+
+            seg_output = self._model([image_tensor])
+            chunks = seg_output[0]['boxes'].tolist()
+            weight = seg_output[0]['scores'].tolist()
+            for ci, ele in enumerate(zip(chunks, weight)):
+                c = doc.chunks.add()
+                c.doc_id = doc.doc_id
+                c.blob.CopyFrom(array2blob(self._crop_image_reshape(original_image, ele[0])))
+                c.offset_1d = ci
+                c.weight = ele[1]
+        else:
+            self.logger.error('bad document: "raw_bytes" is empty!')
+
+    def _crop_image_reshape(self, original_image, coordinates):
+        return np.array(original_image.crop(coordinates).resize((self.target_img_size,
+                                                                 self.target_img_size)))
diff --git a/gnes/preprocessor/image/simple.py → gnes/preprocessor/image/slidingWindow.py b/gnes/preprocessor/image/simple.py → gnes/preprocessor/image/slidingWindow.py
@@ -19,6 +19,7 @@
 from PIL import Image
 
 from .base import BaseImagePreprocessor
+from ..video.ffmpeg import FFmpegPreprocessor
 from ...proto import gnes_pb2, array2blob
 
 
@@ -27,19 +28,24 @@ class BaseSlidingPreprocessor(BaseImagePreprocessor):
     def __init__(self, window_size: int = 64,
                  stride_height: int = 64,
                  stride_wide: int = 64,
+                 target_img_size: int = 224,
                  *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.window_size = window_size
         self.stride_height = stride_height
         self.stride_wide = stride_wide
+        self.target_img_size = target_img_size
 
     def apply(self, doc: 'gnes_pb2.Document'):
         super().apply(doc)
         if doc.raw_bytes:
             img = np.array(Image.open(io.BytesIO(doc.raw_bytes)))
             image_set = self._get_all_sliding_window(img)
-            weight = self._get_all_chunks_weight(image_set)
-            for ci, ele in enumerate(zip(image_set, weight)):
+            normalizaed_image_set = [np.array(self._torch_transform(img)).transpose(1, 2, 0)
+                                     for img in image_set]
+            weight = self._get_all_chunks_weight(normalizaed_image_set)
+
+            for ci, ele in enumerate(zip(normalizaed_image_set, weight)):
                 c = doc.chunks.add()
                 c.doc_id = doc.doc_id
                 c.blob.CopyFrom(array2blob(ele[0]))
@@ -75,36 +81,17 @@ def _get_all_sliding_window(self, img: 'np.ndarray') -> List['np.ndarray']:
             writeable=False
         )
         expanded_input = expanded_input.reshape((-1, self.window_size, self.window_size, 3))
-        return [np.array(Image.fromarray(img).resize((self.target_img_size, self.target_img_size))) for img in
-                expanded_input]
+        return [np.array(Image.fromarray(img).resize((self.target_img_size, self.target_img_size))) for img in expanded_input]
 
 
 class VanillaSlidingPreprocessor(BaseSlidingPreprocessor):
 
-    def _get_all_chunks_weight(self, image_set) -> List[float]:
+    def _get_all_chunks_weight(self, image_set: List['np.ndarray']) -> List[float]:
         return [1 / len(image_set) for _ in range(len(image_set))]
 
 
 class WeightedSlidingPreprocessor(BaseSlidingPreprocessor):
 
-    def _get_all_chunks_weight(self, image_set) -> List[float]:
-        weight = np.zeros([len(image_set)])
-        # n_channel is usually 3 for RGB images
-        n_channel = image_set[0].shape[-1]
-        for i in range(len(image_set)):
-            # calcualte the variance of histgram of pixels
-            weight[i] = sum([np.histogram(image_set[i][:, :, _])[0].var()
-                             for _ in range(n_channel)])
-        weight = weight / weight.sum()
-
-        # normalized result
-        weight = np.exp(- weight * 10)
-        return weight / weight.sum()
-
-
-class SegmentPreprocessor(BaseImagePreprocessor):
-    def apply(self, doc: 'gnes_pb2.Document'):
-        raise NotImplementedError
-
     def _get_all_chunks_weight(self, image_set: List['np.ndarray']) -> List[float]:
-        raise NotImplementedError
+        return FFmpegPreprocessor.pic_weight(image_set)
+
diff --git a/tests/__init__.py b/tests/__init__.py
@@ -48,6 +48,8 @@ def line2pb_doc(line: str, doc_id: int = 0, deliminator: str = r'[.。！？!?]+
         'VGG_MODEL': '/',
         'RESNET_MODEL': '/',
         'INCEPTION_MODEL': '/',
+        'MOBILENET_MODEL': '/',
+        'FASTERRCNN_MODEL': '/',
         'GNES_PROFILING': 0
     },
     'idc-165': {
@@ -63,6 +65,8 @@ def line2pb_doc(line: str, doc_id: int = 0, deliminator: str = r'[.。！？!?]+
         'VGG_MODEL': '/ext_data/image_encoder',
         'RESNET_MODEL': '/ext_data/image_encoder',
         'INCEPTION_MODEL': '/ext_data/image_encoder',
+        'MOBILENET_MODEL': '/ext_data/image_encoder',
+        'FASTERRCNN_MODEL': '/ext_data/image_preprocessor',
         'GNES_PROFILING': 0
     }
 

diff --git a/tests/test_image_encoder.py b/tests/test_image_encoder.py
@@ -5,7 +5,7 @@
 import numpy as np
 
 from gnes.encoder.image.base import BasePytorchEncoder
-from gnes.preprocessor.image.simple import VanillaSlidingPreprocessor
+from gnes.preprocessor.image.slidingWindow import VanillaSlidingPreprocessor
 from gnes.preprocessor.base import BaseSingletonPreprocessor
 from gnes.proto import gnes_pb2, blob2array
 
@@ -39,6 +39,7 @@ def setUp(self):
         self.vgg_yaml = os.path.join(dirname, 'yaml', 'vgg-encoder.yml')
         self.res_yaml = os.path.join(dirname, 'yaml', 'resnet-encoder.yml')
         self.inception_yaml = os.path.join(dirname, 'yaml', 'inception-encoder.yml')
+        self.mobilenet_yaml = os.path.join(dirname, 'yaml', 'mobilenet-encoder.yml')
 
     def test_vgg_encoding(self):
         self.encoder = BasePytorchEncoder.load_yaml(self.vgg_yaml)
@@ -64,6 +65,14 @@ def test_inception_encoding(self):
             self.assertEqual(vec.shape[0], len(test_img))
             self.assertEqual(vec.shape[1], 2048)
 
+    def test_mobilenet_encoding(self):
+        self.encoder = BasePytorchEncoder.load_yaml(self.mobilenet_yaml)
+        for test_img in self.test_img:
+            vec = self.encoder.encode(test_img)
+            print("the length of data now is:", len(test_img))
+            self.assertEqual(vec.shape[0], len(test_img))
+            self.assertEqual(vec.shape[1], 1280)
+
     def test_dump_load(self):
         self.encoder = BasePytorchEncoder.load_yaml(self.vgg_yaml)
 

diff --git a/tests/test_image_preprocessor.py b/tests/test_image_preprocessor.py
@@ -14,6 +14,7 @@ def setUp(self):
         self.dirname = os.path.dirname(__file__)
         self.singleton_img_pre_yaml = os.path.join(self.dirname, 'yaml', 'base-singleton-image-prep.yml')
         self.slidingwindow_img_pre_yaml = os.path.join(self.dirname, 'yaml', 'base-vanilla_sldwin-image-prep.yml')
+        self.segmentation_img_pre_yaml = os.path.join(self.dirname, 'yaml', 'base-segmentation-image-prep.yml')
 
     def test_singleton_preprocessor_service_empty(self):
         args = set_preprocessor_service_parser().parse_args([
@@ -29,6 +30,13 @@ def test_slidingwindow_preprocessor_service_empty(self):
         with PreprocessorService(args):
             pass
 
+    def test_segmentation_preprocessor_service_empty(self):
+        args = set_preprocessor_service_parser().parse_args([
+            '--yaml_path', self.segmentation_img_pre_yaml
+        ])
+        with PreprocessorService(args):
+            pass
+
     def test_singleton_preprocessor_service_echo(self):
         args = set_preprocessor_service_parser().parse_args([
             '--yaml_path', self.singleton_img_pre_yaml
@@ -67,6 +75,25 @@ def test_slidingwindow_preprocessor_service_echo(self):
             r = client.recv_message()
             # print(r)
 
+    def test_segmentation_preprocessor_service_echo(self):
+        args = set_preprocessor_service_parser().parse_args([
+            '--yaml_path', self.segmentation_img_pre_yaml
+        ])
+        c_args = _set_client_parser().parse_args([
+            '--port_in', str(args.port_out),
+            '--port_out', str(args.port_in)
+        ])
+        with PreprocessorService(args), ZmqClient(c_args) as client:
+            msg = gnes_pb2.Message()
+            msg.request.index.docs.extend([gnes_pb2.Document() for _ in range(5)])
+            client.send_message(msg)
+            r = client.recv_message()
+            # print(r)
+            msg.request.train.docs.extend([gnes_pb2.Document() for _ in range(5)])
+            client.send_message(msg)
+            r = client.recv_message()
+            # print(r)
+
     def test_singleton_preprocessor_service_realdata(self):
         args = set_preprocessor_service_parser().parse_args([
             '--yaml_path', self.singleton_img_pre_yaml
@@ -118,3 +145,29 @@ def test_slidingwindow_preprocessor_service_realdata(self):
                     self.assertEqual(blob2array(d.chunks[0].blob).shape[0], 224)
                     self.assertEqual(blob2array(d.chunks[0].blob).shape[1], 224)
                     print(blob2array(d.chunks[0].blob).dtype)
+
+    def test_segmentation_preprocessor_service_realdata(self):
+        args = set_preprocessor_service_parser().parse_args([
+            '--yaml_path', self.segmentation_img_pre_yaml
+        ])
+
+        c_args = _set_client_parser().parse_args([
+            '--port_in', str(args.port_out),
+            '--port_out', str(args.port_in)
+        ])
+        all_zips = zipfile.ZipFile(os.path.join(self.dirname, 'imgs/test.zip'))
+        all_bytes = [all_zips.open(v).read() for v in all_zips.namelist()]
+
+        with PreprocessorService(args), ZmqClient(c_args) as client:
+            for req in RequestGenerator.index(all_bytes):
+                msg = gnes_pb2.Message()
+                msg.request.index.CopyFrom(req.index)
+                client.send_message(msg)
+                r = client.recv_message()
+                self.assertEqual(r.envelope.routes[0].service, 'PreprocessorService:SegmentPreprocessor')
+                for d in r.request.index.docs:
+                    self.assertEqual(len(blob2array(d.chunks[0].blob).shape), 3)
+                    self.assertEqual(blob2array(d.chunks[0].blob).shape[-1], 3)
+                    self.assertEqual(blob2array(d.chunks[0].blob).shape[0], 224)
+                    self.assertEqual(blob2array(d.chunks[0].blob).shape[1], 224)
+                    print(blob2array(d.chunks[0].blob).dtype)
diff --git a/tests/yaml/base-segmentation-image-prep.yml b/tests/yaml/base-segmentation-image-prep.yml
@@ -0,0 +1,8 @@
+!SegmentPreprocessor
+parameter:
+  model_dir: ${FASTERRCNN_MODEL}
+  model_name: fasterrcnn_resnet50_fpn
+  target_img_size: 224
+  _use_cuda: false
+property:
+  is_trained: true
diff --git a/tests/yaml/mobilenet-encoder.yml b/tests/yaml/mobilenet-encoder.yml
@@ -0,0 +1,9 @@
+!BasePytorchEncoder
+parameter:
+  model_dir: ${MOBILENET_MODEL}
+  model_name: mobilenet_v2
+  layers:
+    - features
+    - x.mean([2, 3])
+property:
+  is_trained: true