diff --git a/gnes/base/__init__.py b/gnes/base/__init__.py index 872201b0..5945fd23 100644 --- a/gnes/base/__init__.py +++ b/gnes/base/__init__.py @@ -22,13 +22,13 @@ import tempfile import uuid from functools import wraps -from typing import Dict, Any, Union, TextIO, TypeVar, Type +from typing import Dict, Any, Union, TextIO, TypeVar, Type, List, Callable import ruamel.yaml.constructor from ..helper import set_logger, profiling, yaml, parse_arg, load_contrib_module -__all__ = ['TrainableBase'] +__all__ = ['TrainableBase', 'CompositionalTrainableBase'] T = TypeVar('T', bound='TrainableBase') @@ -295,7 +295,7 @@ def _get_instance_from_yaml(cls, constructor, node, stop_on_import_error=False): if stop_on_import_error: raise RuntimeError('Cannot import module, pip install may required') from ex - if node.tag in {'!PipelineEncoder', '!CompositionalEncoder'}: + if node.tag in {'!PipelineEncoder', '!CompositionalTrainableBase'}: os.environ['GNES_WARN_UNNAMED_COMPONENT'] = '0' data = ruamel.yaml.constructor.SafeConstructor.construct_mapping( @@ -325,7 +325,7 @@ def _get_instance_from_yaml(cls, constructor, node, stop_on_import_error=False): obj.logger.info('initialize %s from a yaml config' % cls.__name__) cls.init_from_yaml = False - if node.tag in {'!PipelineEncoder', '!CompositionalEncoder'}: + if node.tag in {'!PipelineEncoder', '!CompositionalTrainableBase'}: os.environ['GNES_WARN_UNNAMED_COMPONENT'] = '1' return obj, data, load_from_dump @@ -355,3 +355,69 @@ def _dump_instance_to_yaml(data): if p: r['gnes_config'] = p return r + + def _copy_from(self, x: 'TrainableBase') -> None: + pass + + +class CompositionalTrainableBase(TrainableBase): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._component = None # type: List[T] + + @property + def component(self) -> Union[List[T], Dict[str, T]]: + return self._component + + @property + def is_pipeline(self): + return isinstance(self.component, list) + + @component.setter + def component(self, comps: Callable[[], Union[list, dict]]): + if not callable(comps): + raise TypeError('component must be a callable function that returns ' + 'a List[BaseEncoder]') + if not getattr(self, 'init_from_yaml', False): + self._component = comps() + else: + self.logger.info('component is omitted from construction, ' + 'as it is initialized from yaml config') + + def close(self): + super().close() + # pipeline + if isinstance(self.component, list): + for be in self.component: + be.close() + # no typology + elif isinstance(self.component, dict): + for be in self.component.values(): + be.close() + elif self.component is None: + pass + else: + raise TypeError('component must be dict or list, received %s' % type(self.component)) + + def _copy_from(self, x: T): + if isinstance(self.component, list): + for be1, be2 in zip(self.component, x.component): + be1._copy_from(be2) + elif isinstance(self.component, dict): + for k, v in self.component.items(): + v._copy_from(x.component[k]) + else: + raise TypeError('component must be dict or list, received %s' % type(self.component)) + + @classmethod + def to_yaml(cls, representer, data): + tmp = super()._dump_instance_to_yaml(data) + tmp['component'] = data.component + return representer.represent_mapping('!' + cls.__name__, tmp) + + @classmethod + def from_yaml(cls, constructor, node): + obj, data, from_dump = super()._get_instance_from_yaml(constructor, node) + if not from_dump and 'component' in data: + obj.component = lambda: data['component'] + return obj diff --git a/gnes/cli/parser.py b/gnes/cli/parser.py index 7dab3078..154b70e8 100644 --- a/gnes/cli/parser.py +++ b/gnes/cli/parser.py @@ -54,7 +54,6 @@ def set_composer_parser(parser=None): 'gnes', '/'.join(('resources', 'config', 'compose', 'default.yml'))), help='yaml config of the service') parser.add_argument('--html_path', type=argparse.FileType('w', encoding='utf8'), - default='./gnes-board.html', help='output path of the HTML file, will contain all possible generations') parser.add_argument('--shell_path', type=argparse.FileType('w', encoding='utf8'), help='output path of the shell-based starting script') @@ -214,10 +213,11 @@ def set_grpc_frontend_parser(parser=None): from ..service.base import SocketType if not parser: parser = set_base_parser() - _set_client_parser(parser) + set_service_parser(parser) _set_grpc_parser(parser) parser.set_defaults(socket_in=SocketType.PULL_BIND, - socket_out=SocketType.PUSH_BIND) + socket_out=SocketType.PUSH_BIND, + read_only=True) parser.add_argument('--max_concurrency', type=int, default=10, help='maximum concurrent client allowed') parser.add_argument('--max_send_size', type=int, default=100, diff --git a/gnes/composer/base.py b/gnes/composer/base.py index 41e77f23..e6fe7137 100644 --- a/gnes/composer/base.py +++ b/gnes/composer/base.py @@ -156,10 +156,24 @@ def build_layers(self) -> List['YamlComposer.Layer']: last_layer = self._layers[idx - 1] for l in self._add_router(last_layer, layer): all_layers.append(copy.deepcopy(l)) - # # add frontend - # for l in self._add_router(all_layers[-1], all_layers[0]): - # all_layers.append(copy.deepcopy(l)) all_layers[0] = copy.deepcopy(self._layers[0]) + + # gRPCfrontend should always on the bind role + assert all_layers[0].is_single_component + assert all_layers[0].components[0]['name'] == 'gRPCFrontend' + + if all_layers[0].components[0]['socket_in'] == str(SocketType.SUB_CONNECT): + # change to sub bind + all_layers[0].components[0]['socket_in'] = str(SocketType.SUB_BIND) + for c in all_layers[-1].components: + c['socket_out'] = str(SocketType.PUB_CONNECT) + + if all_layers[0].components[0]['socket_in'] == str(SocketType.PULL_CONNECT): + # change to sub bind + all_layers[0].components[0]['socket_in'] = str(SocketType.PULL_BIND) + for c in all_layers[-1].components: + c['socket_out'] = str(SocketType.PUSH_CONNECT) + return all_layers @staticmethod @@ -292,11 +306,11 @@ def build_mermaid(all_layers: List['YamlComposer.Layer'], mermaid_leftright: boo # if len(last_layer.components) > 1: # self.mermaid_graph.append('\tend') - style = ['classDef gRPCFrontendCLS fill:#FFAA04,stroke:#277CE8,stroke-width:1px;', - 'classDef EncoderCLS fill:#27E1E8,stroke:#277CE8,stroke-width:1px;', - 'classDef IndexerCLS fill:#27E1E8,stroke:#277CE8,stroke-width:1px;', - 'classDef RouterCLS fill:#2BFFCB,stroke:#277CE8,stroke-width:1px;', - 'classDef PreprocessorCLS fill:#27E1E8,stroke:#277CE8,stroke-width:1px;'] + style = ['classDef gRPCFrontendCLS fill:#FFE0E0,stroke:#FFE0E0,stroke-width:1px;', + 'classDef EncoderCLS fill:#FFDAAF,stroke:#FFDAAF,stroke-width:1px;', + 'classDef IndexerCLS fill:#FFFBC1,stroke:#FFFBC1,stroke-width:1px;', + 'classDef RouterCLS fill:#C9E8D2,stroke:#C9E8D2,stroke-width:1px;', + 'classDef PreprocessorCLS fill:#CEEEEF,stroke:#CEEEEF,stroke-width:1px;'] class_def = ['class %s %s;' % (','.join(v), k) for k, v in cls_dict.items()] mermaid_str = '\n'.join( ['graph %s' % ('LR' if mermaid_leftright else 'TD')] + mermaid_graph + style + class_def) diff --git a/gnes/composer/flask.py b/gnes/composer/flask.py index 36003967..fe132c11 100644 --- a/gnes/composer/flask.py +++ b/gnes/composer/flask.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -import tempfile +import io from .base import YamlComposer from ..cli.parser import set_composer_parser @@ -37,23 +36,25 @@ def _create_flask_app(self): # support up to 10 concurrent HTTP requests app = Flask(__name__) + args = set_composer_parser().parse_args([]) + default_html = YamlComposer(args).build_all()['html'] @app.route('/', methods=['GET']) def _get_homepage(): - return YamlComposer(set_composer_parser().parse_args([])).build_all()['html'] + return default_html @app.route('/generate', methods=['POST']) def _regenerate(): data = request.form if request.form else request.json if not data or 'yaml-config' not in data: return '

Bad POST request

your POST request does not contain "yaml-config" field!', 406 - f = tempfile.NamedTemporaryFile('w', delete=False).name - with open(f, 'w', encoding='utf8') as fp: - fp.write(data['yaml-config']) try: - return YamlComposer(set_composer_parser().parse_args([ - '--yaml_path', f - ])).build_all()['html'] + args.yaml_path = io.StringIO(data['yaml-config']) + if data.get('mermaid_direction', 'top-down').lower() == 'left-right': + args.mermaid_leftright = True + if 'docker-image' in data: + args.docker_img = data['docker-image'] + return YamlComposer(args).build_all()['html'] except Exception as e: self.logger.error(e) return '

Bad YAML input

please kindly check the format, indent and content of your YAML file!', 400 diff --git a/gnes/encoder/__init__.py b/gnes/encoder/__init__.py index 94dbeefe..70c9ae44 100644 --- a/gnes/encoder/__init__.py +++ b/gnes/encoder/__init__.py @@ -36,7 +36,6 @@ 'BaseTextEncoder': 'base', 'BaseVideoEncoder': 'base', 'BaseNumericEncoder': 'base', - 'CompositionalEncoder': 'base', 'PipelineEncoder': 'base', 'HashEncoder': 'numeric.hash', 'BasePytorchEncoder': 'image.base', diff --git a/gnes/encoder/base.py b/gnes/encoder/base.py index d21ad3aa..d2671115 100644 --- a/gnes/encoder/base.py +++ b/gnes/encoder/base.py @@ -16,11 +16,11 @@ # pylint: disable=low-comment-ratio -from typing import List, Any, Union, Dict, Callable +from typing import List, Any import numpy as np -from ..base import TrainableBase +from ..base import TrainableBase, CompositionalTrainableBase class BaseEncoder(TrainableBase): @@ -64,70 +64,7 @@ def encode(self, data: np.ndarray, *args, **kwargs) -> bytes: return data.tobytes() -class CompositionalEncoder(BaseEncoder): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self._component = None # type: List['BaseEncoder'] - - @property - def component(self) -> Union[List['BaseEncoder'], Dict[str, 'BaseEncoder']]: - return self._component - - @property - def is_pipeline(self): - return isinstance(self.component, list) - - @component.setter - def component(self, comps: Callable[[], Union[list, dict]]): - if not callable(comps): - raise TypeError('component must be a callable function that returns ' - 'a List[BaseEncoder]') - if not getattr(self, 'init_from_yaml', False): - self._component = comps() - else: - self.logger.info('component is omitted from construction, ' - 'as it is initialized from yaml config') - - def close(self): - super().close() - # pipeline - if isinstance(self.component, list): - for be in self.component: - be.close() - # no typology - elif isinstance(self.component, dict): - for be in self.component.values(): - be.close() - elif self.component is None: - pass - else: - raise TypeError('component must be dict or list, received %s' % type(self.component)) - - def _copy_from(self, x: 'CompositionalEncoder'): - if isinstance(self.component, list): - for be1, be2 in zip(self.component, x.component): - be1._copy_from(be2) - elif isinstance(self.component, dict): - for k, v in self.component.items(): - v._copy_from(x.component[k]) - else: - raise TypeError('component must be dict or list, received %s' % type(self.component)) - - @classmethod - def to_yaml(cls, representer, data): - tmp = super()._dump_instance_to_yaml(data) - tmp['component'] = data.component - return representer.represent_mapping('!' + cls.__name__, tmp) - - @classmethod - def from_yaml(cls, constructor, node): - obj, data, from_dump = super()._get_instance_from_yaml(constructor, node) - if not from_dump and 'component' in data: - obj.component = lambda: data['component'] - return obj - - -class PipelineEncoder(CompositionalEncoder): +class PipelineEncoder(CompositionalTrainableBase): def encode(self, data: Any, *args, **kwargs) -> Any: if not self.component: raise NotImplementedError diff --git a/gnes/encoder/text/bert.py b/gnes/encoder/text/bert.py index 0bb1997f..9003a25d 100644 --- a/gnes/encoder/text/bert.py +++ b/gnes/encoder/text/bert.py @@ -20,7 +20,7 @@ import numpy as np -from ..base import CompositionalEncoder, BaseTextEncoder +from ..base import CompositionalTrainableBase, BaseTextEncoder from ...helper import batching @@ -45,7 +45,7 @@ def close(self): self.bc_encoder.close() -class BertEncoderWithServer(CompositionalEncoder): +class BertEncoderWithServer(CompositionalTrainableBase): def encode(self, text: List[str], *args, **kwargs) -> np.ndarray: return self.component['bert_client'].encode(text, *args, **kwargs) diff --git a/gnes/helper.py b/gnes/helper.py index 7e69b0bf..f732ca68 100644 --- a/gnes/helper.py +++ b/gnes/helper.py @@ -194,6 +194,7 @@ class ColoredFormatter(Formatter): 'WARNING': dict(color='red', on_color='on_yellow'), # yellow 'ERROR': dict(color='white', on_color='on_red'), # 31 for red 'CRITICAL': dict(color='red', on_color='on_white'), # white on red bg + 'SUCCESS': dict(color='white', on_color='on_green'), # green } PREFIX = '\033[' @@ -535,4 +536,3 @@ def load_contrib_module(): profiling = time_profile yaml = _get_yaml() - diff --git a/gnes/indexer/base.py b/gnes/indexer/base.py index 57d95c8e..91cdd613 100644 --- a/gnes/indexer/base.py +++ b/gnes/indexer/base.py @@ -19,8 +19,7 @@ import numpy as np -from ..base import TrainableBase -from ..encoder.base import CompositionalEncoder +from ..base import TrainableBase, CompositionalTrainableBase class BaseIndexer(TrainableBase): @@ -71,7 +70,7 @@ def normalize_score(self, *args, **kwargs): pass -class JointIndexer(CompositionalEncoder): +class JointIndexer(CompositionalTrainableBase): @property def component(self): diff --git a/gnes/preprocessor/__init__.py b/gnes/preprocessor/__init__.py index b248de3d..b54b6f29 100644 --- a/gnes/preprocessor/__init__.py +++ b/gnes/preprocessor/__init__.py @@ -20,6 +20,7 @@ _cls2file_map = { 'BasePreprocessor': 'base', + 'PipelinePreprocessor': 'base', 'TextPreprocessor': 'text.simple', 'BaseImagePreprocessor': 'image.base', 'BaseTextPreprocessor': 'text.base', @@ -28,6 +29,7 @@ 'WeightedSlidingPreprocessor': 'image.sliding_window', 'SegmentPreprocessor': 'image.segmentation', 'BaseUnaryPreprocessor': 'base', + 'ResizeChunkPreprocessor': 'image.resize', 'BaseVideoPreprocessor': 'video.base', 'FFmpegPreprocessor': 'video.ffmpeg', 'FFmpegVideoSegmentor': 'video.ffmpeg', diff --git a/gnes/preprocessor/base.py b/gnes/preprocessor/base.py index f2d0e61d..01952112 100644 --- a/gnes/preprocessor/base.py +++ b/gnes/preprocessor/base.py @@ -21,29 +21,51 @@ import numpy as np from PIL import Image -from ..base import TrainableBase +from ..base import TrainableBase, CompositionalTrainableBase from ..proto import gnes_pb2, array2blob class BasePreprocessor(TrainableBase): doc_type = gnes_pb2.Document.UNKNOWN - def __init__(self, start_doc_id: int = 0, random_doc_id: bool = True, *args, **kwargs): + def __init__(self, start_doc_id: int = 0, + random_doc_id: bool = True, + uniform_doc_weight: bool = True, + *args, **kwargs): super().__init__(*args, **kwargs) self.start_doc_id = start_doc_id self.random_doc_id = random_doc_id + self.uniform_doc_weight = uniform_doc_weight def apply(self, doc: 'gnes_pb2.Document') -> None: doc.doc_id = self.start_doc_id if not self.random_doc_id else random.randint(0, ctypes.c_uint(-1).value) doc.doc_type = self.doc_type + if not doc.weight and self.uniform_doc_weight: + doc.weight = 1.0 + self.start_doc_id += 1 + + +class PipelinePreprocessor(CompositionalTrainableBase): + def apply(self, doc: 'gnes_pb2.Document') -> None: + if not self.component: + raise NotImplementedError + for be in self.component: + be.apply(doc) + + def train(self, data, *args, **kwargs): + if not self.component: + raise NotImplementedError + for idx, be in enumerate(self.component): + be.train(data, *args, **kwargs) + if idx + 1 < len(self.component): + data = be.apply(data, *args, **kwargs) class BaseUnaryPreprocessor(BasePreprocessor): + is_trained = True def __init__(self, doc_type: int, *args, **kwargs): super().__init__(*args, **kwargs) - self.target_img_size = 224 - self.is_trained = True self.doc_type = doc_type def apply(self, doc: 'gnes_pb2.Document'): @@ -62,7 +84,6 @@ def raw_to_chunk(self, chunk: 'gnes_pb2.Chunk', raw_bytes: bytes): chunk.text = raw_bytes.decode() elif self.doc_type == gnes_pb2.Document.IMAGE: img = np.array(Image.open(io.BytesIO(raw_bytes))) - img = np.array(Image.fromarray(img).resize((self.target_img_size, self.target_img_size))) chunk.blob.CopyFrom(array2blob(img)) elif self.doc_type == gnes_pb2.Document.VIDEO: raise NotImplementedError diff --git a/gnes/preprocessor/image/base.py b/gnes/preprocessor/image/base.py index a557cbe9..783b2eb7 100644 --- a/gnes/preprocessor/image/base.py +++ b/gnes/preprocessor/image/base.py @@ -46,4 +46,4 @@ def _get_all_subarea(image): index = [[x, y, x + 1, y + 1] for [x, y] in product(range(len(x_list) - 1), range(len(y_list) - 1))] all_subareas = [[x_list[idx[0]], y_list[idx[1]], x_list[idx[2]], y_list[idx[3]]] for idx in index] - return all_subareas, index \ No newline at end of file + return all_subareas, index diff --git a/gnes/preprocessor/image/resize.py b/gnes/preprocessor/image/resize.py new file mode 100644 index 00000000..34846c1a --- /dev/null +++ b/gnes/preprocessor/image/resize.py @@ -0,0 +1,38 @@ +# Tencent is pleased to support the open source community by making GNES available. +# +# Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +from PIL import Image + +from .base import BaseImagePreprocessor +from ...proto import gnes_pb2, blob2array, array2blob + + +class ResizeChunkPreprocessor(BaseImagePreprocessor): + + def __init__(self, + target_width: int = 224, + target_height: int = 224, + *args, **kwargs): + super().__init__(*args, **kwargs) + self.target_width = target_width + self.target_height = target_height + + def apply(self, doc: 'gnes_pb2.Document') -> None: + super().apply(doc) + for c in doc.chunks: + img = blob2array(c.blob) + img = np.array(Image.fromarray(img).resize((self.target_width, self.target_height))) + c.blob.CopyFrom(array2blob(img)) diff --git a/gnes/preprocessor/image/segmentation.py b/gnes/preprocessor/image/segmentation.py index b5a6392f..d66978c8 100644 --- a/gnes/preprocessor/image/segmentation.py +++ b/gnes/preprocessor/image/segmentation.py @@ -28,13 +28,11 @@ class SegmentPreprocessor(BaseImagePreprocessor): def __init__(self, model_name: str, model_dir: str, - target_img_size: int = 224, _use_cuda: bool = False, *args, **kwargs): super().__init__(*args, **kwargs) self.model_name = model_name self.model_dir = model_dir - self.target_img_size = target_img_size self._use_cuda = _use_cuda def post_init(self): @@ -68,7 +66,7 @@ def apply(self, doc: 'gnes_pb2.Document'): for ci, ele in enumerate(zip(chunks, weight)): c = doc.chunks.add() c.doc_id = doc.doc_id - c.blob.CopyFrom(array2blob(self._crop_image_reshape(original_image, ele[0]))) + c.blob.CopyFrom(array2blob(self._crop_resize(original_image, ele[0]))) c.offset_1d = ci c.offset_nd.x.extend(self._get_seg_offset_nd(all_subareas, index, ele[0])) c.weight = self._cal_area(ele[0]) / (original_image.size[0] * original_image.size[1]) @@ -83,7 +81,7 @@ def apply(self, doc: 'gnes_pb2.Document'): else: self.logger.error('bad document: "raw_bytes" is empty!') - def _crop_image_reshape(self, original_image, coordinates): + def _crop_resize(self, original_image, coordinates): return np.array(original_image.crop(coordinates).resize((self.target_img_size, self.target_img_size))) diff --git a/gnes/preprocessor/image/sliding_window.py b/gnes/preprocessor/image/sliding_window.py index bef92f53..63cadeaf 100644 --- a/gnes/preprocessor/image/sliding_window.py +++ b/gnes/preprocessor/image/sliding_window.py @@ -29,13 +29,11 @@ class BaseSlidingPreprocessor(BaseImagePreprocessor): def __init__(self, window_size: int = 64, stride_height: int = 64, stride_wide: int = 64, - target_img_size: int = 224, *args, **kwargs): super().__init__(*args, **kwargs) self.window_size = window_size self.stride_height = stride_height self.stride_wide = stride_wide - self.target_img_size = target_img_size def apply(self, doc: 'gnes_pb2.Document'): super().apply(doc) @@ -100,7 +98,7 @@ def _get_slid_offset_nd(self, all_subareas: List[List[int]], index: List[List[in @staticmethod def _get_location(all_subareas: List[List[int]], center_point: List[float]) -> List[bool]: location_list = [] - x_boundary = max([x[1] for x in all_subareas]) + x_boundary = max([x[2] for x in all_subareas]) y_boundary = max([y[3] for y in all_subareas]) for area in all_subareas: if center_point[0] in range(int(area[0]), int(area[2])) and center_point[1] in range(int(area[1]), @@ -112,6 +110,8 @@ def _get_location(all_subareas: List[List[int]], center_point: List[float]) -> L location_list.append(True) else: location_list.append(False) + if True not in location_list: + location_list[-1] = True return location_list diff --git a/gnes/preprocessor/video/ffmpeg.py b/gnes/preprocessor/video/ffmpeg.py index 30dca7a1..8f234a57 100644 --- a/gnes/preprocessor/video/ffmpeg.py +++ b/gnes/preprocessor/video/ffmpeg.py @@ -25,7 +25,7 @@ class FFmpegPreprocessor(BaseVideoPreprocessor): def __init__(self, - frame_size: str = "192*168", + frame_size: str = '192*168', duplicate_rm: bool = True, use_phash_weight: bool = False, phash_thresh: int = 5, @@ -48,8 +48,8 @@ def apply(self, doc: 'gnes_pb2.Document') -> None: frames = get_video_frames( doc.raw_bytes, s=self.frame_size, - vsync=self._ffmpeg_kwargs.get("vsync", "vfr"), - vf=self._ffmpeg_kwargs.get("vf", "select=eq(pict_type\\,I)")) + vsync=self._ffmpeg_kwargs.get('vsync', 'vfr'), + vf=self._ffmpeg_kwargs.get('vf', 'select=eq(pict_type\\,I)')) # remove dupliated key frames by phash value if self.duplicate_rm: diff --git a/gnes/preprocessor/video/shotdetect.py b/gnes/preprocessor/video/shotdetect.py index 377d8ee5..c1fdca80 100644 --- a/gnes/preprocessor/video/shotdetect.py +++ b/gnes/preprocessor/video/shotdetect.py @@ -26,9 +26,9 @@ class ShotDetectPreprocessor(BaseVideoPreprocessor): store_args_kwargs = True def __init__(self, - frame_size: str = "192*168", - descriptor: str = "block_hsv_histogram", - distance_metric: str = "bhattacharya", + frame_size: str = '192*168', + descriptor: str = 'block_hsv_histogram', + distance_metric: str = 'bhattacharya', *args, **kwargs): super().__init__(*args, **kwargs) @@ -47,7 +47,7 @@ def apply(self, doc: 'gnes_pb2.Document') -> None: frames = get_video_frames( doc.raw_bytes, s=self.frame_size, - vsync="vfr", + vsync='vfr', vf='select=eq(pict_type\\,I)') descriptors = [] diff --git a/gnes/resources/compose/gnes-board.html b/gnes/resources/compose/gnes-board.html index 4a0f8406..e054a9ad 100644 --- a/gnes/resources/compose/gnes-board.html +++ b/gnes/resources/compose/gnes-board.html @@ -123,18 +123,22 @@ } GNES Board - + - + - + - + @@ -215,25 +219,36 @@ YAML config
-
+
- +
- - -
+
+
+ + +
+
+ + +
diff --git a/gnes/service/base.py b/gnes/service/base.py index bfee42d2..be9972cc 100644 --- a/gnes/service/base.py +++ b/gnes/service/base.py @@ -264,7 +264,7 @@ def _run(self, ctx): self.is_ready.set() self.is_event_loop.set() self._start_auto_dump() - self.logger.info('ready and listening') + self.logger.info(colored('ready and listening', color='white', on_color='on_green')) while self.is_event_loop.is_set(): pull_sock = None socks = dict(poller.poll()) diff --git a/gnes/service/grpc.py b/gnes/service/grpc.py index 4745e0b8..f89fdf22 100644 --- a/gnes/service/grpc.py +++ b/gnes/service/grpc.py @@ -106,7 +106,7 @@ def add_envelope(self, body: 'gnes_pb2.Request', zmq_client: 'ZmqClient'): msg.envelope.num_part.append(1) msg.envelope.timeout = 5000 r = msg.envelope.routes.add() - r.service = zmq_client.__class__.__name__ + r.service = GRPCFrontend.__name__ r.timestamp.GetCurrentTime() msg.request.CopyFrom(body) return msg @@ -161,4 +161,4 @@ def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): - pass + self.server.stop(None) diff --git a/tests/imgs/test.zip b/tests/imgs/test.zip index d4cb17d8..c284901f 100644 Binary files a/tests/imgs/test.zip and b/tests/imgs/test.zip differ diff --git a/tests/test_image_encoder.py b/tests/test_image_encoder.py index cd17aaec..82917678 100644 --- a/tests/test_image_encoder.py +++ b/tests/test_image_encoder.py @@ -4,13 +4,14 @@ import zipfile from gnes.encoder.image.base import BasePytorchEncoder -from gnes.preprocessor.base import BaseUnaryPreprocessor +from gnes.preprocessor.base import BaseUnaryPreprocessor, PipelinePreprocessor +from gnes.preprocessor.image.resize import ResizeChunkPreprocessor from gnes.preprocessor.image.sliding_window import VanillaSlidingPreprocessor from gnes.proto import gnes_pb2, blob2array def img_process_for_test(dirname): - zipfile_ = zipfile.ZipFile(os.path.join(dirname, 'imgs/test.zip'), "r") + zipfile_ = zipfile.ZipFile(os.path.join(dirname, 'imgs/test.zip')) all_bytes = [zipfile_.open(v).read() for v in zipfile_.namelist()] test_img = [] for raw_bytes in all_bytes: @@ -19,7 +20,10 @@ def img_process_for_test(dirname): test_img.append(d) test_img_all_preprocessor = [] - for preprocessor in [BaseUnaryPreprocessor(doc_type=gnes_pb2.Document.IMAGE), + pipline_prep1 = PipelinePreprocessor() + pipline_prep1.component = lambda: [BaseUnaryPreprocessor(doc_type=gnes_pb2.Document.IMAGE), + ResizeChunkPreprocessor()] + for preprocessor in [pipline_prep1, VanillaSlidingPreprocessor()]: test_img_copy = copy.deepcopy(test_img) for img in test_img_copy: diff --git a/tests/test_image_preprocessor.py b/tests/test_image_preprocessor.py index c224a462..e7286567 100644 --- a/tests/test_image_preprocessor.py +++ b/tests/test_image_preprocessor.py @@ -15,6 +15,7 @@ def setUp(self): self.unary_img_pre_yaml = os.path.join(self.dirname, 'yaml', 'base-unary-image-prep.yml') self.slidingwindow_img_pre_yaml = os.path.join(self.dirname, 'yaml', 'base-vanilla_sldwin-image-prep.yml') self.segmentation_img_pre_yaml = os.path.join(self.dirname, 'yaml', 'base-segmentation-image-prep.yml') + self.resize_img_pre_yaml = os.path.join(self.dirname, 'yaml', 'resize-image-prep.yml') def test_unary_preprocessor_service_empty(self): args = set_preprocessor_service_parser().parse_args([ @@ -116,9 +117,31 @@ def test_unary_preprocessor_service_realdata(self): self.assertEqual(len(d.chunks), 1) self.assertEqual(len(blob2array(d.chunks[0].blob).shape), 3) self.assertEqual(blob2array(d.chunks[0].blob).shape[-1], 3) + + def test_resize_preprocessor_service_realdata(self): + args = set_preprocessor_service_parser().parse_args([ + '--yaml_path', self.resize_img_pre_yaml + ]) + c_args = _set_client_parser().parse_args([ + '--port_in', str(args.port_out), + '--port_out', str(args.port_in) + ]) + all_zips = zipfile.ZipFile(os.path.join(self.dirname, 'imgs/test.zip')) + all_bytes = [all_zips.open(v).read() for v in all_zips.namelist()] + + with PreprocessorService(args), ZmqClient(c_args) as client: + for req in RequestGenerator.index(all_bytes): + msg = gnes_pb2.Message() + msg.request.index.CopyFrom(req.index) + client.send_message(msg) + r = client.recv_message() + self.assertEqual(r.envelope.routes[0].service, 'PreprocessorService:PipelinePreprocessor') + for d in r.request.index.docs: + self.assertEqual(len(d.chunks), 1) + self.assertEqual(len(blob2array(d.chunks[0].blob).shape), 3) + self.assertEqual(blob2array(d.chunks[0].blob).shape[-1], 3) self.assertEqual(blob2array(d.chunks[0].blob).shape[0], 224) self.assertEqual(blob2array(d.chunks[0].blob).shape[1], 224) - print(blob2array(d.chunks[0].blob).dtype) def test_slidingwindow_preprocessor_service_realdata(self): args = set_preprocessor_service_parser().parse_args([ @@ -144,7 +167,6 @@ def test_slidingwindow_preprocessor_service_realdata(self): self.assertEqual(blob2array(d.chunks[0].blob).shape[-1], 3) self.assertEqual(blob2array(d.chunks[0].blob).shape[0], 224) self.assertEqual(blob2array(d.chunks[0].blob).shape[1], 224) - print(blob2array(d.chunks[0].blob).dtype) def test_segmentation_preprocessor_service_realdata(self): args = set_preprocessor_service_parser().parse_args([ @@ -170,4 +192,4 @@ def test_segmentation_preprocessor_service_realdata(self): self.assertEqual(blob2array(d.chunks[0].blob).shape[-1], 3) self.assertEqual(blob2array(d.chunks[0].blob).shape[0], 224) self.assertEqual(blob2array(d.chunks[0].blob).shape[1], 224) - print(blob2array(d.chunks[0].blob).dtype) \ No newline at end of file + print(blob2array(d.chunks[0].blob).dtype) diff --git a/tests/test_parser.py b/tests/test_parser.py new file mode 100644 index 00000000..38c54a30 --- /dev/null +++ b/tests/test_parser.py @@ -0,0 +1,11 @@ +import unittest + +from gnes.cli.parser import set_grpc_frontend_parser + + +class TestParser(unittest.TestCase): + def test_service_parser(self): + args1 = set_grpc_frontend_parser().parse_args([]) + args2 = set_grpc_frontend_parser().parse_args([]) + self.assertNotEqual(args1.port_in, args2.port_in) + self.assertNotEqual(args1.port_out, args2.port_out) diff --git a/tests/test_pipelinepreprocess.py b/tests/test_pipelinepreprocess.py new file mode 100644 index 00000000..b6a11ef3 --- /dev/null +++ b/tests/test_pipelinepreprocess.py @@ -0,0 +1,45 @@ +import os +import unittest + +from gnes.preprocessor.base import BasePreprocessor, PipelinePreprocessor +from gnes.proto import gnes_pb2 + + +class P1(BasePreprocessor): + def apply(self, doc: 'gnes_pb2.Document'): + doc.doc_id += 1 + + +class P2(BasePreprocessor): + def apply(self, doc: 'gnes_pb2.Document'): + doc.doc_id *= 3 + + +class TestPartition(unittest.TestCase): + def setUp(self): + self.dirname = os.path.dirname(__file__) + self.p3_name = 'pipe-p12' + self.yml_dump_path = os.path.join(self.dirname, '%s.yml' % self.p3_name) + self.bin_dump_path = os.path.join(self.dirname, '%s.bin' % self.p3_name) + + def tearDown(self): + if os.path.exists(self.yml_dump_path): + os.remove(self.yml_dump_path) + if os.path.exists(self.bin_dump_path): + os.remove(self.bin_dump_path) + + def test_pipelinepreproces(self): + p3 = PipelinePreprocessor() + p3.component = lambda: [P1(), P2()] + d = gnes_pb2.Document() + d.doc_id = 1 + p3.apply(d) + self.assertEqual(d.doc_id, 6) + + p3.name = self.p3_name + p3.dump_yaml() + p3.dump() + + p4 = BasePreprocessor.load_yaml(p3.yaml_full_path) + p4.apply(d) + self.assertEqual(d.doc_id, 21) diff --git a/tests/test_stream_grpc.py b/tests/test_stream_grpc.py index 1278534c..f500fd20 100644 --- a/tests/test_stream_grpc.py +++ b/tests/test_stream_grpc.py @@ -38,7 +38,7 @@ def setUp(self): self.all_bytes = [b'abc', b'def', b'cde'] * 10 self.all_bytes2 = [b'abc', b'def', b'cde'] - @unittest.mock.patch.dict(os.environ, {'http_proxy': '', 'https_proxy': ''}) + # @unittest.mock.patch.dict(os.environ, {'http_proxy': '', 'https_proxy': ''}) def test_grpc_frontend(self): args = set_grpc_frontend_parser().parse_args([ '--grpc_host', '127.0.0.1', @@ -61,7 +61,7 @@ def test_grpc_frontend(self): self.assertEqual(resp.request_id, str(len(self.all_bytes))) # idx start with 0, but +1 for final FLUSH - @unittest.mock.patch.dict(os.environ, {'http_proxy': '', 'https_proxy': ''}) + # @unittest.mock.patch.dict(os.environ, {'http_proxy': '', 'https_proxy': ''}) def test_async_block(self): args = set_grpc_frontend_parser().parse_args([ '--grpc_host', '127.0.0.1', @@ -81,7 +81,7 @@ def test_async_block(self): '--socket_out', str(SocketType.PUSH_CONNECT), ]) - with Router1(p1_args), Router2(p2_args), GRPCFrontend(args), grpc.insecure_channel( + with GRPCFrontend(args), Router1(p1_args), Router2(p2_args), grpc.insecure_channel( '%s:%s' % (args.grpc_host, args.grpc_port), options=[('grpc.max_send_message_length', 70 * 1024 * 1024), ('grpc.max_receive_message_length', 70 * 1024 * 1024)]) as channel: diff --git a/tests/yaml/resize-image-prep.yml b/tests/yaml/resize-image-prep.yml new file mode 100644 index 00000000..b1015b8d --- /dev/null +++ b/tests/yaml/resize-image-prep.yml @@ -0,0 +1,9 @@ +!PipelinePreprocessor +component: + - !BaseUnaryPreprocessor + parameter: + doc_type: 2 + - !ResizeChunkPreprocessor + parameter: + target_height: 224 + target_width: 224 \ No newline at end of file diff --git a/yaml-example/component/img_preprocessor_singleton.yml b/yaml-example/component/img_preprocessor_unary.yml similarity index 100% rename from yaml-example/component/img_preprocessor_singleton.yml rename to yaml-example/component/img_preprocessor_unary.yml