diff --git a/docker-compose/component/image_preprocessor.yml b/docker-compose/component/image_preprocessor.yml deleted file mode 100644 index e278e7fb..00000000 --- a/docker-compose/component/image_preprocessor.yml +++ /dev/null @@ -1,9 +0,0 @@ -!ImagePreprocessor -parameter: - start_doc_id: 0 - random_doc_id: True - use_split: True - split_method: 'stride' - is_rgb: True -property: - is_trained: true \ No newline at end of file diff --git a/docker-compose/component/img_preprocessor_fasterRCNN.yml b/docker-compose/component/img_preprocessor_fasterRCNN.yml new file mode 100644 index 00000000..47049a88 --- /dev/null +++ b/docker-compose/component/img_preprocessor_fasterRCNN.yml @@ -0,0 +1,8 @@ +!SegmentPreprocessor +parameter: + model_dir: /ext_data/image_preprocessor + model_name: fasterrcnn_resnet50_fpn + target_img_size: 224 + _use_cuda: false +property: + is_trained: true \ No newline at end of file diff --git a/docker-compose/component/img_preprocessor_singleton.yml b/docker-compose/component/img_preprocessor_singleton.yml new file mode 100644 index 00000000..e5f03a49 --- /dev/null +++ b/docker-compose/component/img_preprocessor_singleton.yml @@ -0,0 +1,3 @@ +!BaseSingletonPreprocessor +parameter: + doc_type: 2 \ No newline at end of file diff --git a/docker-compose/component/img_preprocessor_vanilla_sldwin.yml b/docker-compose/component/img_preprocessor_vanilla_sldwin.yml new file mode 100644 index 00000000..e24f474f --- /dev/null +++ b/docker-compose/component/img_preprocessor_vanilla_sldwin.yml @@ -0,0 +1,6 @@ +!VanillaSlidingPreprocessor +parameter: + window_size: 16 + stride_height: 16 + stride_wide: 16 + target_img_size: 224 \ No newline at end of file diff --git a/docker-compose/component/img_preprocessor_weight_sldwin.yml b/docker-compose/component/img_preprocessor_weight_sldwin.yml new file mode 100644 index 00000000..a108a7fb --- /dev/null +++ b/docker-compose/component/img_preprocessor_weight_sldwin.yml @@ -0,0 +1,6 @@ +!WeightedSlidingPreprocessor +parameter: + window_size: 16 + stride_height: 16 + stride_wide: 16 + target_img_size: 224 \ No newline at end of file diff --git a/gnes/preprocessor/__init__.py b/gnes/preprocessor/__init__.py index 7dafc751..e5b976d2 100644 --- a/gnes/preprocessor/__init__.py +++ b/gnes/preprocessor/__init__.py @@ -25,7 +25,7 @@ 'BaseTextPreprocessor': 'text.base', 'VanillaSlidingPreprocessor': 'image.simple', 'WeightedSlidingPreprocessor': 'image.simple', - 'SegmentPreprocessor': 'image.simple', + 'SegmentPreprocessor': 'image.segmentation', 'BaseSingletonPreprocessor': 'base', 'BaseVideoPreprocessor': 'video.base', 'FFmpegPreprocessor': 'video.ffmpeg', diff --git a/gnes/preprocessor/image/base.py b/gnes/preprocessor/image/base.py index 50fc5372..f92dfb0a 100644 --- a/gnes/preprocessor/image/base.py +++ b/gnes/preprocessor/image/base.py @@ -29,4 +29,10 @@ def __init__(self, self.is_rgb = is_rgb def _get_all_chunks_weight(self, image_set: List['np.ndarray']) -> List[float]: - pass \ No newline at end of file + pass + + @classmethod + def _torch_transform(cls, image): + import torchvision.transforms as transforms + return transforms.Compose([transforms.ToTensor(), + transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))])(image) \ No newline at end of file diff --git a/gnes/preprocessor/image/segmentation.py b/gnes/preprocessor/image/segmentation.py new file mode 100644 index 00000000..530cf123 --- /dev/null +++ b/gnes/preprocessor/image/segmentation.py @@ -0,0 +1,55 @@ +from .base import BaseImagePreprocessor +from ...proto import gnes_pb2, array2blob +from PIL import Image +import numpy as np +import io +import os + + +class SegmentPreprocessor(BaseImagePreprocessor): + + def __init__(self, model_name: str, + model_dir: str, + target_img_size: int = 224, + _use_cuda: bool = False, + *args, **kwargs): + super().__init__(*args, **kwargs) + self.model_name = model_name + self.model_dir = model_dir + self.target_img_size = target_img_size + self.model_name = model_name + self._use_cuda = _use_cuda + + def post_init(self): + import torch + import torchvision.models as models + + os.environ['TORCH_HOME'] = self.model_dir + self._model = getattr(models.detection, self.model_name)(pretrained=True) + self._model = self._model.eval() + if self._use_cuda: + # self._model.cuda() + self._device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + self._model = self._model.to(self._device) + + def apply(self, doc: 'gnes_pb2.Document'): + super().apply(doc) + if doc.raw_bytes: + original_image = Image.open(io.BytesIO(doc.raw_bytes)) + image_tensor = self._torch_transform(original_image) + + seg_output = self._model([image_tensor]) + chunks = seg_output[0]['boxes'].tolist() + weight = seg_output[0]['scores'].tolist() + for ci, ele in enumerate(zip(chunks, weight)): + c = doc.chunks.add() + c.doc_id = doc.doc_id + c.blob.CopyFrom(array2blob(self._crop_image_reshape(original_image, ele[0]))) + c.offset_1d = ci + c.weight = ele[1] + else: + self.logger.error('bad document: "raw_bytes" is empty!') + + def _crop_image_reshape(self, original_image, coordinates): + return np.array(original_image.crop(coordinates).resize((self.target_img_size, + self.target_img_size))) diff --git a/gnes/preprocessor/image/simple.py b/gnes/preprocessor/image/slidingWindow.py similarity index 76% rename from gnes/preprocessor/image/simple.py rename to gnes/preprocessor/image/slidingWindow.py index 83776ecb..4cccdd76 100644 --- a/gnes/preprocessor/image/simple.py +++ b/gnes/preprocessor/image/slidingWindow.py @@ -19,6 +19,7 @@ from PIL import Image from .base import BaseImagePreprocessor +from ..video.ffmpeg import FFmpegPreprocessor from ...proto import gnes_pb2, array2blob @@ -27,19 +28,24 @@ class BaseSlidingPreprocessor(BaseImagePreprocessor): def __init__(self, window_size: int = 64, stride_height: int = 64, stride_wide: int = 64, + target_img_size: int = 224, *args, **kwargs): super().__init__(*args, **kwargs) self.window_size = window_size self.stride_height = stride_height self.stride_wide = stride_wide + self.target_img_size = target_img_size def apply(self, doc: 'gnes_pb2.Document'): super().apply(doc) if doc.raw_bytes: img = np.array(Image.open(io.BytesIO(doc.raw_bytes))) image_set = self._get_all_sliding_window(img) - weight = self._get_all_chunks_weight(image_set) - for ci, ele in enumerate(zip(image_set, weight)): + normalizaed_image_set = [np.array(self._torch_transform(img)).transpose(1, 2, 0) + for img in image_set] + weight = self._get_all_chunks_weight(normalizaed_image_set) + + for ci, ele in enumerate(zip(normalizaed_image_set, weight)): c = doc.chunks.add() c.doc_id = doc.doc_id c.blob.CopyFrom(array2blob(ele[0])) @@ -75,36 +81,17 @@ def _get_all_sliding_window(self, img: 'np.ndarray') -> List['np.ndarray']: writeable=False ) expanded_input = expanded_input.reshape((-1, self.window_size, self.window_size, 3)) - return [np.array(Image.fromarray(img).resize((self.target_img_size, self.target_img_size))) for img in - expanded_input] + return [np.array(Image.fromarray(img).resize((self.target_img_size, self.target_img_size))) for img in expanded_input] class VanillaSlidingPreprocessor(BaseSlidingPreprocessor): - def _get_all_chunks_weight(self, image_set) -> List[float]: + def _get_all_chunks_weight(self, image_set: List['np.ndarray']) -> List[float]: return [1 / len(image_set) for _ in range(len(image_set))] class WeightedSlidingPreprocessor(BaseSlidingPreprocessor): - def _get_all_chunks_weight(self, image_set) -> List[float]: - weight = np.zeros([len(image_set)]) - # n_channel is usually 3 for RGB images - n_channel = image_set[0].shape[-1] - for i in range(len(image_set)): - # calcualte the variance of histgram of pixels - weight[i] = sum([np.histogram(image_set[i][:, :, _])[0].var() - for _ in range(n_channel)]) - weight = weight / weight.sum() - - # normalized result - weight = np.exp(- weight * 10) - return weight / weight.sum() - - -class SegmentPreprocessor(BaseImagePreprocessor): - def apply(self, doc: 'gnes_pb2.Document'): - raise NotImplementedError - def _get_all_chunks_weight(self, image_set: List['np.ndarray']) -> List[float]: - raise NotImplementedError + return FFmpegPreprocessor.pic_weight(image_set) + diff --git a/tests/__init__.py b/tests/__init__.py index dab044ea..00cbdf44 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -48,6 +48,8 @@ def line2pb_doc(line: str, doc_id: int = 0, deliminator: str = r'[.。!?!?]+ 'VGG_MODEL': '/', 'RESNET_MODEL': '/', 'INCEPTION_MODEL': '/', + 'MOBILENET_MODEL': '/', + 'FASTERRCNN_MODEL': '/', 'GNES_PROFILING': 0 }, 'idc-165': { @@ -63,6 +65,8 @@ def line2pb_doc(line: str, doc_id: int = 0, deliminator: str = r'[.。!?!?]+ 'VGG_MODEL': '/ext_data/image_encoder', 'RESNET_MODEL': '/ext_data/image_encoder', 'INCEPTION_MODEL': '/ext_data/image_encoder', + 'MOBILENET_MODEL': '/ext_data/image_encoder', + 'FASTERRCNN_MODEL': '/ext_data/image_preprocessor', 'GNES_PROFILING': 0 } diff --git a/tests/test_image_encoder.py b/tests/test_image_encoder.py index 3b282901..23da2760 100644 --- a/tests/test_image_encoder.py +++ b/tests/test_image_encoder.py @@ -5,7 +5,7 @@ import numpy as np from gnes.encoder.image.base import BasePytorchEncoder -from gnes.preprocessor.image.simple import VanillaSlidingPreprocessor +from gnes.preprocessor.image.slidingWindow import VanillaSlidingPreprocessor from gnes.preprocessor.base import BaseSingletonPreprocessor from gnes.proto import gnes_pb2, blob2array @@ -39,6 +39,7 @@ def setUp(self): self.vgg_yaml = os.path.join(dirname, 'yaml', 'vgg-encoder.yml') self.res_yaml = os.path.join(dirname, 'yaml', 'resnet-encoder.yml') self.inception_yaml = os.path.join(dirname, 'yaml', 'inception-encoder.yml') + self.mobilenet_yaml = os.path.join(dirname, 'yaml', 'mobilenet-encoder.yml') def test_vgg_encoding(self): self.encoder = BasePytorchEncoder.load_yaml(self.vgg_yaml) @@ -64,6 +65,14 @@ def test_inception_encoding(self): self.assertEqual(vec.shape[0], len(test_img)) self.assertEqual(vec.shape[1], 2048) + def test_mobilenet_encoding(self): + self.encoder = BasePytorchEncoder.load_yaml(self.mobilenet_yaml) + for test_img in self.test_img: + vec = self.encoder.encode(test_img) + print("the length of data now is:", len(test_img)) + self.assertEqual(vec.shape[0], len(test_img)) + self.assertEqual(vec.shape[1], 1280) + def test_dump_load(self): self.encoder = BasePytorchEncoder.load_yaml(self.vgg_yaml) diff --git a/tests/test_image_preprocessor.py b/tests/test_image_preprocessor.py index bfe84009..7ee724dc 100644 --- a/tests/test_image_preprocessor.py +++ b/tests/test_image_preprocessor.py @@ -14,6 +14,7 @@ def setUp(self): self.dirname = os.path.dirname(__file__) self.singleton_img_pre_yaml = os.path.join(self.dirname, 'yaml', 'base-singleton-image-prep.yml') self.slidingwindow_img_pre_yaml = os.path.join(self.dirname, 'yaml', 'base-vanilla_sldwin-image-prep.yml') + self.segmentation_img_pre_yaml = os.path.join(self.dirname, 'yaml', 'base-segmentation-image-prep.yml') def test_singleton_preprocessor_service_empty(self): args = set_preprocessor_service_parser().parse_args([ @@ -29,6 +30,13 @@ def test_slidingwindow_preprocessor_service_empty(self): with PreprocessorService(args): pass + def test_segmentation_preprocessor_service_empty(self): + args = set_preprocessor_service_parser().parse_args([ + '--yaml_path', self.segmentation_img_pre_yaml + ]) + with PreprocessorService(args): + pass + def test_singleton_preprocessor_service_echo(self): args = set_preprocessor_service_parser().parse_args([ '--yaml_path', self.singleton_img_pre_yaml @@ -67,6 +75,25 @@ def test_slidingwindow_preprocessor_service_echo(self): r = client.recv_message() # print(r) + def test_segmentation_preprocessor_service_echo(self): + args = set_preprocessor_service_parser().parse_args([ + '--yaml_path', self.segmentation_img_pre_yaml + ]) + c_args = _set_client_parser().parse_args([ + '--port_in', str(args.port_out), + '--port_out', str(args.port_in) + ]) + with PreprocessorService(args), ZmqClient(c_args) as client: + msg = gnes_pb2.Message() + msg.request.index.docs.extend([gnes_pb2.Document() for _ in range(5)]) + client.send_message(msg) + r = client.recv_message() + # print(r) + msg.request.train.docs.extend([gnes_pb2.Document() for _ in range(5)]) + client.send_message(msg) + r = client.recv_message() + # print(r) + def test_singleton_preprocessor_service_realdata(self): args = set_preprocessor_service_parser().parse_args([ '--yaml_path', self.singleton_img_pre_yaml @@ -118,3 +145,29 @@ def test_slidingwindow_preprocessor_service_realdata(self): self.assertEqual(blob2array(d.chunks[0].blob).shape[0], 224) self.assertEqual(blob2array(d.chunks[0].blob).shape[1], 224) print(blob2array(d.chunks[0].blob).dtype) + + def test_segmentation_preprocessor_service_realdata(self): + args = set_preprocessor_service_parser().parse_args([ + '--yaml_path', self.segmentation_img_pre_yaml + ]) + + c_args = _set_client_parser().parse_args([ + '--port_in', str(args.port_out), + '--port_out', str(args.port_in) + ]) + all_zips = zipfile.ZipFile(os.path.join(self.dirname, 'imgs/test.zip')) + all_bytes = [all_zips.open(v).read() for v in all_zips.namelist()] + + with PreprocessorService(args), ZmqClient(c_args) as client: + for req in RequestGenerator.index(all_bytes): + msg = gnes_pb2.Message() + msg.request.index.CopyFrom(req.index) + client.send_message(msg) + r = client.recv_message() + self.assertEqual(r.envelope.routes[0].service, 'PreprocessorService:SegmentPreprocessor') + for d in r.request.index.docs: + self.assertEqual(len(blob2array(d.chunks[0].blob).shape), 3) + self.assertEqual(blob2array(d.chunks[0].blob).shape[-1], 3) + self.assertEqual(blob2array(d.chunks[0].blob).shape[0], 224) + self.assertEqual(blob2array(d.chunks[0].blob).shape[1], 224) + print(blob2array(d.chunks[0].blob).dtype) \ No newline at end of file diff --git a/tests/yaml/base-segmentation-image-prep.yml b/tests/yaml/base-segmentation-image-prep.yml new file mode 100644 index 00000000..8fc57e42 --- /dev/null +++ b/tests/yaml/base-segmentation-image-prep.yml @@ -0,0 +1,8 @@ +!SegmentPreprocessor +parameter: + model_dir: ${FASTERRCNN_MODEL} + model_name: fasterrcnn_resnet50_fpn + target_img_size: 224 + _use_cuda: false +property: + is_trained: true \ No newline at end of file diff --git a/tests/yaml/mobilenet-encoder.yml b/tests/yaml/mobilenet-encoder.yml new file mode 100644 index 00000000..38f45faf --- /dev/null +++ b/tests/yaml/mobilenet-encoder.yml @@ -0,0 +1,9 @@ +!BasePytorchEncoder +parameter: + model_dir: ${MOBILENET_MODEL} + model_name: mobilenet_v2 + layers: + - features + - x.mean([2, 3]) +property: + is_trained: true