diff --git a/README.md b/README.md index f125ba77..b7686ef8 100644 --- a/README.md +++ b/README.md @@ -60,8 +60,38 @@ GNES enables large-scale index and semantic search for text-to-text, image-to-im component overview

+

Install GNES

-

Getting Started

+There are two ways to get GNES, either as a docker image or as a PyPi package. +For cloud users, we highly recommend using GNES as a docker image. + +## Run GNES as a Docker Image + +#### via [Docker cloud](https://cloud.docker.com/u/gnes/repository/list) + +```bash +docker pull gnes/gnes:latest +docker run gnes/gnes:latest --help +``` + +#### via Tencent Container service + +We also provide a public mirror hosted on Tencent Cloud, from which Chinese mainland users can pull the image faster. + +```bash +docker login --username=xxx ccr.ccs.tencentyun.com # login to Tencent Cloud so that we can pull from it +docker pull ccr.ccs.tencentyun.com/gnes/gnes:latest +docker run ccr.ccs.tencentyun.com/gnes/gnes:latest --help +``` + +> You may pull and run different versions by changing the `latest` to a version tag, e.g. `v0.0.24`. + +## Install via `pip` + +Install + + +

Quick Start

As a cloud-native application, GNES requires an **orchestration engine** to coordinate all micro-services. Currently, we support Kubernetes, Docker Swarm and a built-in solution. Click on one of the icons below to get started. diff --git a/gnes/base/__init__.py b/gnes/base/__init__.py index 88e5af39..fe4791bc 100644 --- a/gnes/base/__init__.py +++ b/gnes/base/__init__.py @@ -26,13 +26,14 @@ import ruamel.yaml.constructor -from ..helper import set_logger, profiling, yaml, parse_arg +from ..helper import set_logger, profiling, yaml, parse_arg, load_contrib_module __all__ = ['TrainableBase'] T = TypeVar('T', bound='TrainableBase') + def register_all_class(cls2file_map: Dict, module_name: str): import importlib for k, v in cls2file_map.items(): @@ -40,6 +41,7 @@ def register_all_class(cls2file_map: Dict, module_name: str): getattr(importlib.import_module('gnes.%s.%s' % (module_name, v)), k) except ImportError: pass + load_contrib_module() def import_class_by_str(name: str): @@ -339,3 +341,6 @@ def _dump_instance_to_yaml(data): if p: r['gnes_config'] = p return r + + + diff --git a/gnes/cli/api.py b/gnes/cli/api.py index d609a8a7..804adbe5 100644 --- a/gnes/cli/api.py +++ b/gnes/cli/api.py @@ -35,8 +35,13 @@ def route(args): def compose(args): - from ..composer.base import YamlGraph - YamlGraph(args).build_all() + from ..composer.base import YamlComposer + from ..composer.flask import YamlComposerFlask + + if args.flask: + YamlComposerFlask(args).run() + else: + YamlComposer(args).build_all() def frontend(args): diff --git a/gnes/cli/parser.py b/gnes/cli/parser.py index c90a5f56..b6aea372 100644 --- a/gnes/cli/parser.py +++ b/gnes/cli/parser.py @@ -18,15 +18,18 @@ import argparse -IDX_PORT_DELTA = 2 - def set_base_parser(): from .. import __version__ + from termcolor import colored # create the top-level parser parser = argparse.ArgumentParser( - description='GNES v%s: Generic Neural Elastic Search ' - 'is an end-to-end solution for semantic text search' % __version__) + description='%s, a cloud-native semantic search system ' + 'based on deep neural network. ' + 'It enables large-scale index and semantic search for text-to-text, image-to-image, ' + 'video-to-video and any content form. Visit %s for tutorials and documentations.' % ( + colored('GNES v%s: Generic Neural Elastic Search' % __version__, 'green'), + colored('https://gnes.ai', 'cyan', attrs=['underline']))) parser.add_argument('-v', '--version', action='version', version='%(prog)s ' + __version__) parser.add_argument('--verbose', action='store_true', default=False, help='turn on detailed logging for debug') @@ -34,6 +37,8 @@ def set_base_parser(): def set_composer_parser(parser=None): + from pkg_resources import resource_stream + if not parser: parser = set_base_parser() parser.add_argument('--port', @@ -45,7 +50,8 @@ def set_composer_parser(parser=None): default='GNES instance', help='name of the instance') parser.add_argument('--yaml_path', type=argparse.FileType('r'), - required=True, + default=resource_stream( + 'gnes', '/'.join(('resources', 'config', 'compose', 'default.yml'))), help='yaml config of the service') parser.add_argument('--html_path', type=argparse.FileType('w', encoding='utf8'), default='./gnes-board.html', @@ -69,6 +75,19 @@ def set_composer_parser(parser=None): return parser +def set_composer_flask_parser(parser=None): + if not parser: + parser = set_base_parser() + set_composer_parser(parser) + parser.add_argument('--flask', action='store_true', default=False, + help='using Flask to serve GNES composer in interactive mode') + parser.add_argument('--cors', type=str, default='*', + help='setting "Access-Control-Allow-Origin" for HTTP requests') + parser.add_argument('--http_port', type=int, default=8080, + help='server port for receiving HTTP requests') + return parser + + def set_service_parser(parser=None): from ..service.base import SocketType, BaseService if not parser: @@ -166,7 +185,7 @@ def set_indexer_service_parser(parser=None): # encoder's port_out is indexer's port_in parser.set_defaults(port_in=parser.get_default('port_out'), - port_out=parser.get_default('port_out') + IDX_PORT_DELTA, + port_out=parser.get_default('port_out') + 2, socket_in=SocketType.PULL_CONNECT, socket_out=SocketType.PUB_BIND) return parser @@ -244,7 +263,9 @@ def set_http_service_parser(parser=None): def get_main_parser(): # create the top-level parser parser = set_base_parser() - sp = parser.add_subparsers(dest='cli') + sp = parser.add_subparsers(dest='cli', title='GNES sub-commands', + description='use "gnes [sub-command] --help" ' + 'to get detailed information about each sub-command') set_grpc_frontend_parser(sp.add_parser('frontend', help='start a grpc frontend service')) set_indexer_service_parser(sp.add_parser('index', help='start an indexer service')) @@ -253,5 +274,5 @@ def get_main_parser(): set_preprocessor_service_parser(sp.add_parser('preprocess', help='start a preprocessor service')) set_http_service_parser(sp.add_parser('client_http', help='start a http service')) set_cli_client_parser(sp.add_parser('client_cli', help='start a grpc client')) - set_composer_parser(sp.add_parser('compose', help='start a GNES composer to simplify config generation')) + set_composer_flask_parser(sp.add_parser('compose', help='start a GNES Board and visualize YAML config')) return parser diff --git a/gnes/client/cli.py b/gnes/client/cli.py index 9f6037cd..3a90b4aa 100644 --- a/gnes/client/cli.py +++ b/gnes/client/cli.py @@ -1,3 +1,19 @@ +# Tencent is pleased to support the open source community by making GNES available. +# +# Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import zipfile import grpc diff --git a/gnes/client/http.py b/gnes/client/http.py index e85efaf4..fc1027cb 100644 --- a/gnes/client/http.py +++ b/gnes/client/http.py @@ -21,7 +21,6 @@ from concurrent.futures import ThreadPoolExecutor import grpc -from aiohttp import web from google.protobuf.json_format import MessageToJson from ..helper import set_logger @@ -34,6 +33,7 @@ def __init__(self, args=None): self.logger = set_logger(self.__class__.__name__, self.args.verbose) def start(self): + from aiohttp import web loop = asyncio.get_event_loop() executor = ThreadPoolExecutor(max_workers=self.args.max_workers) diff --git a/gnes/composer/base.py b/gnes/composer/base.py index cf0e04ad..6070a87f 100644 --- a/gnes/composer/base.py +++ b/gnes/composer/base.py @@ -1,3 +1,18 @@ +# Tencent is pleased to support the open source community by making GNES available. +# +# Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import copy import random import time @@ -10,13 +25,16 @@ from termcolor import colored from .. import __version__ +from ..cli.parser import set_grpc_frontend_parser, \ + set_router_service_parser, set_loadable_service_parser, set_preprocessor_service_parser, \ + set_indexer_service_parser from ..helper import set_logger from ..service.base import SocketType _yaml = YAML() -class YamlGraph: +class YamlComposer: comp2file = { 'Encoder': 'encode', 'Router': 'route', @@ -25,6 +43,14 @@ class YamlGraph: 'Preprocessor': 'preprocess' } + comp2args = { + 'Encoder': set_loadable_service_parser().parse_args([]), + 'Router': set_router_service_parser().parse_args([]), + 'Indexer': set_indexer_service_parser().parse_args([]), + 'gRPCFrontend': set_grpc_frontend_parser().parse_args([]), + 'Preprocessor': set_preprocessor_service_parser().parse_args([]) + } + class Layer: default_values = { 'name': None, @@ -39,7 +65,7 @@ def __init__(self, layer_id: int = 0): @staticmethod def get_value(comp: Dict, key: str): - return comp.get(key, YamlGraph.Layer.default_values[key]) + return comp.get(key, YamlComposer.Layer.default_values[key]) @property def is_homogenous(self): @@ -71,9 +97,9 @@ def __repr__(self): return str(self.components) def __init__(self, args): + self.args = args + self.logger = set_logger(self.__class__.__name__, self.args.verbose) - self._layers = [] # type: List['YamlGraph.Layer'] - self.logger = set_logger(self.__class__.__name__) with args.yaml_path: tmp = _yaml.load(args.yaml_path) stream = StringIO() @@ -85,7 +111,7 @@ def __init__(self, args): self._networks = tmp.get('networks', {}) self._volumes = tmp.get('volumes', {}) - self.args = args + self._layers = [] # type: List['YamlComposer.Layer'] self._num_layer = 0 if 'services' in tmp: @@ -125,8 +151,8 @@ def add_layer(self, layer: 'Layer' = None) -> None: def add_comp(self, comp: Dict) -> None: self._layers[-1].append(comp) - def build_layers(self) -> List['YamlGraph.Layer']: - all_layers = [] # type: List['YamlGraph.Layer'] + def build_layers(self) -> List['YamlComposer.Layer']: + all_layers = [] # type: List['YamlComposer.Layer'] for idx, layer in enumerate(self._layers[1:] + [self._layers[0]], 1): last_layer = self._layers[idx - 1] for l in self._add_router(last_layer, layer): @@ -138,17 +164,16 @@ def build_layers(self) -> List['YamlGraph.Layer']: return all_layers @staticmethod - def build_dockerswarm(all_layers: List['YamlGraph.Layer'], docker_img: str = 'gnes/gnes:latest', + def build_dockerswarm(all_layers: List['YamlComposer.Layer'], docker_img: str = 'gnes/gnes:latest', volumes: Dict = None, networks: Dict = None) -> str: with resource_stream('gnes', '/'.join(('resources', 'compose', 'gnes-swarm.yml'))) as r: swarm_lines = _yaml.load(r) - taboo = {'name', 'replicas', 'yaml_path'} config_dict = {} for l_idx, layer in enumerate(all_layers): for c_idx, c in enumerate(layer.components): c_name = '%s%d%d' % (c['name'], l_idx, c_idx) args = ['--%s %s' % (a, str(v) if ' ' not in str(v) else ('"%s"' % str(v))) for a, v in c.items() if - a not in taboo and v] + a in YamlComposer.comp2args[c['name']] and v] if 'yaml_path' in c and c['yaml_path'] is not None: args.append('--yaml_path /%s_yaml' % c_name) config_dict['%s_yaml' % c_name] = {'file': c['yaml_path']} @@ -181,16 +206,16 @@ def build_dockerswarm(all_layers: List['YamlGraph.Layer'], docker_img: str = 'gn args += ['--host_in %s' % host_in_name] # '--host_out %s' % host_out_name] - cmd = '%s %s' % (YamlGraph.comp2file[c['name']], ' '.join(args)) + cmd = '%s %s' % (YamlComposer.comp2file[c['name']], ' '.join(args)) swarm_lines['services'][c_name] = CommentedMap({ 'image': docker_img, 'command': cmd, }) - rep_c = YamlGraph.Layer.get_value(c, 'replicas') + rep_c = YamlComposer.Layer.get_value(c, 'replicas') if rep_c > 1: swarm_lines['services'][c_name]['deploy'] = CommentedMap({ - 'replicas': YamlGraph.Layer.get_value(c, 'replicas'), + 'replicas': YamlComposer.Layer.get_value(c, 'replicas'), 'restart_policy': { 'condition': 'on-failure', 'max_attempts': 3, @@ -213,23 +238,22 @@ def build_dockerswarm(all_layers: List['YamlGraph.Layer'], docker_img: str = 'gn return stream.getvalue() @staticmethod - def build_kubernetes(all_layers: List['YamlGraph.Layer'], *args, **kwargs): + def build_kubernetes(all_layers: List['YamlComposer.Layer'], *args, **kwargs): pass @staticmethod - def build_shell(all_layers: List['YamlGraph.Layer'], log_redirect: str = None) -> str: + def build_shell(all_layers: List['YamlComposer.Layer'], log_redirect: str = None) -> str: shell_lines = [] - taboo = {'name', 'replicas'} for layer in all_layers: for c in layer.components: - rep_c = YamlGraph.Layer.get_value(c, 'replicas') + rep_c = YamlComposer.Layer.get_value(c, 'replicas') shell_lines.append('printf "starting service %s with %s replicas...\\n"' % ( colored(c['name'], 'green'), colored(rep_c, 'yellow'))) for _ in range(rep_c): - cmd = YamlGraph.comp2file[c['name']] + cmd = YamlComposer.comp2file[c['name']] args = ' '.join( ['--%s %s' % (a, str(v) if ' ' not in str(v) else ('"%s"' % str(v))) for a, v in c.items() if - a not in taboo and v]) + a in YamlComposer.comp2args[c['name']] and v]) shell_lines.append('gnes %s %s %s &' % ( cmd, args, '>> %s 2>&1' % log_redirect if log_redirect else '')) @@ -237,7 +261,7 @@ def build_shell(all_layers: List['YamlGraph.Layer'], log_redirect: str = None) - return r.read().decode().replace('{{gnes-template}}', '\n'.join(shell_lines)) @staticmethod - def build_mermaid(all_layers: List['YamlGraph.Layer'], mermaid_leftright: bool = False) -> str: + def build_mermaid(all_layers: List['YamlComposer.Layer'], mermaid_leftright: bool = False) -> str: mermaid_graph = [] cls_dict = defaultdict(set) for l_idx, layer in enumerate(all_layers[1:] + [all_layers[0]], 1): @@ -246,20 +270,20 @@ def build_mermaid(all_layers: List['YamlGraph.Layer'], mermaid_leftright: bool = for c_idx, c in enumerate(last_layer.components): # if len(last_layer.components) > 1: # self.mermaid_graph.append('\tsubgraph %s%d' % (c['name'], c_idx)) - for j in range(YamlGraph.Layer.get_value(c, 'replicas')): + for j in range(YamlComposer.Layer.get_value(c, 'replicas')): for c1_idx, c1 in enumerate(layer.components): if c1['port_in'] == c['port_out']: p = '((%s%s))' if c['name'] == 'Router' else '(%s%s)' p1 = '((%s%s))' if c1['name'] == 'Router' else '(%s%s)' - for j1 in range(YamlGraph.Layer.get_value(c1, 'replicas')): + for j1 in range(YamlComposer.Layer.get_value(c1, 'replicas')): _id, _id1 = '%s%s%s' % (last_layer.layer_id, c_idx, j), '%s%s%s' % ( layer.layer_id, c1_idx, j1) conn_type = ( c['socket_out'].split('_')[0] + '/' + c1['socket_in'].split('_')[0]).lower() s_id = '%s%s' % (c_idx if len(last_layer.components) > 1 else '', - j if YamlGraph.Layer.get_value(c, 'replicas') > 1 else '') + j if YamlComposer.Layer.get_value(c, 'replicas') > 1 else '') s1_id = '%s%s' % (c1_idx if len(layer.components) > 1 else '', - j1 if YamlGraph.Layer.get_value(c1, 'replicas') > 1 else '') + j1 if YamlComposer.Layer.get_value(c1, 'replicas') > 1 else '') mermaid_graph.append( '\t%s%s%s-- %s -->%s%s%s' % ( c['name'], _id, p % (c['name'], s_id), conn_type, c1['name'], _id1, @@ -310,11 +334,15 @@ def std_or_print(f, content): 'timestamp': time.strftime("%a, %d %b %Y %H:%M:%S"), 'version': __version__ } + + cmds['html'] = self.build_html(cmds) + std_or_print(self.args.graph_path, cmds['mermaid']) std_or_print(self.args.shell_path, cmds['shell']) std_or_print(self.args.swarm_path, cmds['docker']) std_or_print(self.args.k8s_path, cmds['k8s']) - std_or_print(self.args.html_path, self.build_html(cmds)) + std_or_print(self.args.html_path, cmds['html']) + return cmds @staticmethod def _get_random_port(min_port: int = 49152, max_port: int = 65536) -> str: @@ -324,7 +352,7 @@ def _get_random_port(min_port: int = 49152, max_port: int = 65536) -> str: def _get_random_host(comp_name: str) -> str: return str(comp_name + str(random.randrange(0, 100))) - def _add_router(self, last_layer: 'YamlGraph.Layer', layer: 'YamlGraph.Layer') -> List['YamlGraph.Layer']: + def _add_router(self, last_layer: 'YamlComposer.Layer', layer: 'YamlComposer.Layer') -> List['YamlComposer.Layer']: def rule1(): # a shortcut fn: push connect the last and current last_layer.components[0]['socket_out'] = str(SocketType.PUSH_BIND) @@ -337,7 +365,7 @@ def rule2(): def rule3(): # a shortcut fn: (N)-2-(N) with push pull connection - router_layer = YamlGraph.Layer(layer_id=self._num_layer) + router_layer = YamlComposer.Layer(layer_id=self._num_layer) self._num_layer += 1 last_layer.components[0]['socket_out'] = str(SocketType.PUSH_CONNECT) r = CommentedMap({'name': 'Router', @@ -366,7 +394,7 @@ def rule5(): def rule6(): last_layer.components[0]['socket_out'] = str(SocketType.PUB_BIND) - router_layer = YamlGraph.Layer(layer_id=self._num_layer) + router_layer = YamlComposer.Layer(layer_id=self._num_layer) self._num_layer += 1 for c in layer.components: income = self.Layer.get_value(c, 'income') @@ -385,7 +413,7 @@ def rule6(): def rule7(): last_layer.components[0]['socket_out'] = str(SocketType.PUSH_CONNECT) - router_layer = YamlGraph.Layer(layer_id=self._num_layer) + router_layer = YamlComposer.Layer(layer_id=self._num_layer) self._num_layer += 1 r0 = CommentedMap({'name': 'Router', 'yaml_path': None, @@ -397,7 +425,7 @@ def rule7(): router_layers.append(router_layer) last_layer.components[0]['port_out'] = r0['port_in'] - router_layer = YamlGraph.Layer(layer_id=self._num_layer) + router_layer = YamlComposer.Layer(layer_id=self._num_layer) self._num_layer += 1 for c in layer.components: r = CommentedMap({'name': 'Router', @@ -414,7 +442,7 @@ def rule7(): def rule10(): last_layer.components[0]['socket_out'] = str(SocketType.PUSH_CONNECT) - router_layer = YamlGraph.Layer(layer_id=self._num_layer) + router_layer = YamlComposer.Layer(layer_id=self._num_layer) self._num_layer += 1 r0 = CommentedMap({'name': 'Router', 'yaml_path': None, @@ -432,7 +460,7 @@ def rule10(): def rule8(): last_layer.components[0]['socket_out'] = str(SocketType.PUSH_CONNECT) - router_layer = YamlGraph.Layer(layer_id=self._num_layer) + router_layer = YamlComposer.Layer(layer_id=self._num_layer) self._num_layer += 1 r = CommentedMap({'name': 'Router', 'yaml_path': None, @@ -466,7 +494,7 @@ def rule8(): else: self._num_layer -= 1 - router_layer = YamlGraph.Layer(layer_id=self._num_layer) + router_layer = YamlComposer.Layer(layer_id=self._num_layer) self._num_layer += 1 router_layer.append(r) router_layers.append(router_layer) diff --git a/gnes/composer/flask.py b/gnes/composer/flask.py new file mode 100644 index 00000000..36003967 --- /dev/null +++ b/gnes/composer/flask.py @@ -0,0 +1,67 @@ +# Tencent is pleased to support the open source community by making GNES available. +# +# Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import tempfile + +from .base import YamlComposer +from ..cli.parser import set_composer_parser +from ..helper import set_logger + + +class YamlComposerFlask: + def __init__(self, args): + self.args = args + self.logger = set_logger(self.__class__.__name__, self.args.verbose) + + def _create_flask_app(self): + try: + from flask import Flask, request + from flask_compress import Compress + from flask_cors import CORS + except ImportError: + raise ImportError('Flask or its dependencies are not fully installed, ' + 'they are required for serving HTTP requests.' + 'Please use "pip install gnes[http]" to install it.') + + # support up to 10 concurrent HTTP requests + app = Flask(__name__) + + @app.route('/', methods=['GET']) + def _get_homepage(): + return YamlComposer(set_composer_parser().parse_args([])).build_all()['html'] + + @app.route('/generate', methods=['POST']) + def _regenerate(): + data = request.form if request.form else request.json + if not data or 'yaml-config' not in data: + return '

Bad POST request

your POST request does not contain "yaml-config" field!', 406 + f = tempfile.NamedTemporaryFile('w', delete=False).name + with open(f, 'w', encoding='utf8') as fp: + fp.write(data['yaml-config']) + try: + return YamlComposer(set_composer_parser().parse_args([ + '--yaml_path', f + ])).build_all()['html'] + except Exception as e: + self.logger.error(e) + return '

Bad YAML input

please kindly check the format, indent and content of your YAML file!', 400 + + CORS(app, origins=self.args.cors) + Compress().init_app(app) + return app + + def run(self): + app = self._create_flask_app() + app.run(port=self.args.http_port, threaded=True, host='0.0.0.0') diff --git a/gnes/encoder/base.py b/gnes/encoder/base.py index 05deae3b..bf3c1f77 100644 --- a/gnes/encoder/base.py +++ b/gnes/encoder/base.py @@ -21,7 +21,6 @@ import numpy as np from ..base import TrainableBase -from ..proto import gnes_pb2 class BaseEncoder(TrainableBase): diff --git a/gnes/encoder/image/base.py b/gnes/encoder/image/base.py index 90691dbd..6a92c108 100644 --- a/gnes/encoder/image/base.py +++ b/gnes/encoder/image/base.py @@ -53,12 +53,13 @@ def fn_parser(self, layer: str) -> Callable: if '(' not in layer and ')' not in layer: # this is a shorthand syntax we need to add "(x)" at the end - layer = 'm.%s(x)'%layer + layer = 'm.%s(x)' % layer else: pass def layer_fn(x, l, m, torch): return eval(l) + return lambda x: layer_fn(x, layer, self.m, torch) def forward(self, x): @@ -93,4 +94,3 @@ def encode(self, img: List['np.ndarray'], *args, **kwargs) -> np.ndarray: result_npy.append(encodes.data.cpu().numpy()) return np.array(result_npy, dtype=np.float32) - diff --git a/gnes/encoder/image/cvae.py b/gnes/encoder/image/cvae.py index e3b0e711..f297e17d 100644 --- a/gnes/encoder/image/cvae.py +++ b/gnes/encoder/image/cvae.py @@ -14,11 +14,13 @@ # limitations under the License. from typing import List + import numpy as np -from gnes.helper import batch_iterator -from ..base import BaseImageEncoder from PIL import Image +from ..base import BaseImageEncoder +from ...helper import batch_iterator + class CVAEEncoder(BaseImageEncoder): @@ -56,7 +58,7 @@ def post_init(self): def encode(self, img: List['np.ndarray'], *args, **kwargs) -> np.ndarray: ret = [] img = [(np.array(Image.fromarray(im).resize((120, 120)), - dtype=np.float32)/255) for im in img] + dtype=np.float32) / 255) for im in img] for _im in batch_iterator(img, self.batch_size): _mean, _var = self.sess.run((self.mean, self.var), feed_dict={self.inputs: _im}) diff --git a/gnes/encoder/image/cvae_cores/model.py b/gnes/encoder/image/cvae_cores/model.py index d928473d..9a348fac 100644 --- a/gnes/encoder/image/cvae_cores/model.py +++ b/gnes/encoder/image/cvae_cores/model.py @@ -13,8 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import tensorflow as tf import numpy as np +import tensorflow as tf class CVAE(tf.keras.Model): @@ -23,53 +23,53 @@ def __init__(self, latent_dim): self.latent_dim = latent_dim self.inference_net = tf.keras.Sequential( [ - tf.keras.layers.InputLayer(input_shape=(120, 120, 3)), - tf.keras.layers.Conv2D( - filters=32, kernel_size=3, strides=(2, 2), - padding='SAME', - activation='relu'), - tf.keras.layers.Conv2D( - filters=32, kernel_size=3, strides=(2, 2), - padding='SAME', - activation='relu'), - tf.keras.layers.Conv2D( - filters=32, kernel_size=3, strides=(2, 2), - padding='SAME', - activation='relu'), - tf.keras.layers.Flatten(), - # No activation - tf.keras.layers.Dense(latent_dim + latent_dim), + tf.keras.layers.InputLayer(input_shape=(120, 120, 3)), + tf.keras.layers.Conv2D( + filters=32, kernel_size=3, strides=(2, 2), + padding='SAME', + activation='relu'), + tf.keras.layers.Conv2D( + filters=32, kernel_size=3, strides=(2, 2), + padding='SAME', + activation='relu'), + tf.keras.layers.Conv2D( + filters=32, kernel_size=3, strides=(2, 2), + padding='SAME', + activation='relu'), + tf.keras.layers.Flatten(), + # No activation + tf.keras.layers.Dense(latent_dim + latent_dim), ] ) self.generative_net = tf.keras.Sequential( - [ - tf.keras.layers.InputLayer(input_shape=(latent_dim,)), - tf.keras.layers.Dense(units=15*15*32, - activation=tf.nn.relu), - tf.keras.layers.Reshape(target_shape=(15, 15, 32)), - tf.keras.layers.Conv2DTranspose( - filters=32, - kernel_size=3, - strides=(2, 2), - padding="SAME", - activation='relu'), - tf.keras.layers.Conv2DTranspose( - filters=32, - kernel_size=3, - strides=(2, 2), - padding="SAME", - activation='relu'), - tf.keras.layers.Conv2DTranspose( - filters=32, - kernel_size=3, - strides=(2, 2), - padding="SAME", - activation='relu'), - # No activation - tf.keras.layers.Conv2DTranspose( - filters=3, kernel_size=3, strides=(1, 1), padding="SAME"), - ] + [ + tf.keras.layers.InputLayer(input_shape=(latent_dim,)), + tf.keras.layers.Dense(units=15 * 15 * 32, + activation=tf.nn.relu), + tf.keras.layers.Reshape(target_shape=(15, 15, 32)), + tf.keras.layers.Conv2DTranspose( + filters=32, + kernel_size=3, + strides=(2, 2), + padding="SAME", + activation='relu'), + tf.keras.layers.Conv2DTranspose( + filters=32, + kernel_size=3, + strides=(2, 2), + padding="SAME", + activation='relu'), + tf.keras.layers.Conv2DTranspose( + filters=32, + kernel_size=3, + strides=(2, 2), + padding="SAME", + activation='relu'), + # No activation + tf.keras.layers.Conv2DTranspose( + filters=3, kernel_size=3, strides=(1, 1), padding="SAME"), + ] ) def sample(self, eps=None): diff --git a/gnes/encoder/image/inception.py b/gnes/encoder/image/inception.py index af143201..5c962146 100644 --- a/gnes/encoder/image/inception.py +++ b/gnes/encoder/image/inception.py @@ -14,10 +14,12 @@ # limitations under the License. from typing import List + import numpy as np +from PIL import Image + from ..base import BaseImageEncoder from ...helper import batching, batch_iterator -from PIL import Image class TFInceptionEncoder(BaseImageEncoder): @@ -63,7 +65,8 @@ def post_init(self): def encode(self, img: List['np.ndarray'], *args, **kwargs) -> np.ndarray: ret = [] img = [(np.array(Image.fromarray(im).resize((self.inception_size_x, - self.inception_size_y)), dtype=np.float32) * 2 / 255. - 1.) for im in img] + self.inception_size_y)), dtype=np.float32) * 2 / 255. - 1.) for im + in img] for _im in batch_iterator(img, self.batch_size): _, end_points_ = self.sess.run((self.logits, self.end_points), feed_dict={self.inputs: _im}) diff --git a/gnes/encoder/numeric/tf_pq.py b/gnes/encoder/numeric/tf_pq.py index eac20a43..1d051f11 100644 --- a/gnes/encoder/numeric/tf_pq.py +++ b/gnes/encoder/numeric/tf_pq.py @@ -70,6 +70,5 @@ def encode(self, vecs: np.ndarray, *args, **kwargs) -> np.ndarray: self._graph['ph_centroids']: self.centroids}) return tmp.astype(np.uint8) - def close(self): self._sess.close() diff --git a/gnes/encoder/text/elmo.py b/gnes/encoder/text/elmo.py index 10bfcbf9..64352db3 100644 --- a/gnes/encoder/text/elmo.py +++ b/gnes/encoder/text/elmo.py @@ -66,4 +66,3 @@ def encode(self, text: List[str], *args, **kwargs) -> np.ndarray: _pooled = pooling_np(_layer_data, self.pooling_strategy) pooled_data.append(_pooled) return np.array(pooled_data, dtype=np.float32) - diff --git a/gnes/encoder/text/gpt.py b/gnes/encoder/text/gpt.py index e5f64c1c..b7549aca 100644 --- a/gnes/encoder/text/gpt.py +++ b/gnes/encoder/text/gpt.py @@ -109,7 +109,6 @@ def encode(self, text: List[str], *args, **kwargs) -> np.ndarray: return output_tensor.numpy() - class GPT2Encoder(GPTEncoder): def _get_token_ids(self, x): diff --git a/gnes/helper.py b/gnes/helper.py index 82a7f4ed..a14e435c 100644 --- a/gnes/helper.py +++ b/gnes/helper.py @@ -40,7 +40,7 @@ __all__ = ['get_sys_info', 'get_optimal_sample_size', 'get_perm', 'time_profile', 'set_logger', 'batch_iterator', 'batching', 'yaml', - 'profile_logger', 'doc_logger', + 'profile_logger', 'load_contrib_module', 'parse_arg', 'profiling', 'FileLock', 'train_required', 'get_first_available_gpu'] @@ -506,8 +506,33 @@ def arg_wrapper(self, *args, **kwargs): return arg_wrapper +def load_contrib_module(): + if not os.getenv('GNES_CONTRIB_MODULE_IS_LOADING'): + import importlib.util + + contrib = os.getenv('GNES_CONTRIB_MODULE') + os.environ['GNES_CONTRIB_MODULE_IS_LOADING'] = 'true' + + modules = [] + + if contrib: + default_logger.info( + 'find value in $GNES_CONTRIB_MODULE=%s, will try to load these modules from external' % contrib) + for c in contrib.split(','): + if ':' in c: + _name, _path = c.split(':') + spec = importlib.util.spec_from_file_location('gnes.contrib', _path) + foo = importlib.util.module_from_spec(spec) + spec.loader.exec_module(foo) + m = getattr(foo, _name) + modules.append(m) + default_logger.info('successfully register %s class, you can now use it via yaml.' % m) + return modules + + profile_logger = set_logger('PROFILE') -doc_logger = set_logger('DOC') +default_logger = set_logger('GNES') profiling = time_profile yaml = _get_yaml() + diff --git a/gnes/indexer/key_only.py b/gnes/indexer/key_only.py index 4fd34191..9be22082 100644 --- a/gnes/indexer/key_only.py +++ b/gnes/indexer/key_only.py @@ -1,3 +1,19 @@ +# Tencent is pleased to support the open source community by making GNES available. +# +# Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + from typing import List, Tuple import numpy as np diff --git a/gnes/indexer/vector/annoy.py b/gnes/indexer/vector/annoy.py index 2c3a9913..cacf4c64 100644 --- a/gnes/indexer/vector/annoy.py +++ b/gnes/indexer/vector/annoy.py @@ -1,3 +1,18 @@ +# Tencent is pleased to support the open source community by making GNES available. +# +# Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os from typing import List, Tuple diff --git a/gnes/preprocessor/helper.py b/gnes/preprocessor/helper.py index aa0a747d..31117eae 100644 --- a/gnes/preprocessor/helper.py +++ b/gnes/preprocessor/helper.py @@ -20,9 +20,9 @@ from typing import List, Callable import cv2 +import imagehash import numpy as np from PIL import Image -import imagehash def get_video_frames(buffer_data: bytes, image_format: str = "cv2", @@ -73,7 +73,7 @@ def get_video_frames(buffer_data: bytes, image_format: str = "cv2", def block_descriptor(image: "np.ndarray", descriptor_fn: Callable, num_blocks: int = 3) -> "np.ndarray": - h, w, _ = image.shape # find shape of image and channel + h, w, _ = image.shape # find shape of image and channel block_h = int(np.ceil(h / num_blocks)) block_w = int(np.ceil(w / num_blocks)) @@ -91,7 +91,7 @@ def pyramid_descriptor(image: "np.ndarray", max_level: int = 2) -> "np.ndarray": descriptors = [] for level in range(max_level + 1): - num_blocks = 2**level + num_blocks = 2 ** level descriptors.extend(block_descriptor(image, descriptor_fn, num_blocks)) return np.array(descriptors) diff --git a/gnes/preprocessor/image/base.py b/gnes/preprocessor/image/base.py index f92dfb0a..e2e72d56 100644 --- a/gnes/preprocessor/image/base.py +++ b/gnes/preprocessor/image/base.py @@ -13,9 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import List + from ..base import BasePreprocessor from ...proto import gnes_pb2 -from typing import List class BaseImagePreprocessor(BasePreprocessor): @@ -35,4 +36,4 @@ def _get_all_chunks_weight(self, image_set: List['np.ndarray']) -> List[float]: def _torch_transform(cls, image): import torchvision.transforms as transforms return transforms.Compose([transforms.ToTensor(), - transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))])(image) \ No newline at end of file + transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))])(image) diff --git a/gnes/preprocessor/image/segmentation.py b/gnes/preprocessor/image/segmentation.py index 0dadbc46..dbc99170 100644 --- a/gnes/preprocessor/image/segmentation.py +++ b/gnes/preprocessor/image/segmentation.py @@ -1,10 +1,27 @@ -from .base import BaseImagePreprocessor -from ...proto import array2blob -from PIL import Image -import numpy as np +# Tencent is pleased to support the open source community by making GNES available. +# +# Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import io import os +import numpy as np +from PIL import Image + +from .base import BaseImagePreprocessor +from ...proto import array2blob + class SegmentPreprocessor(BaseImagePreprocessor): diff --git a/gnes/preprocessor/image/sliding_window.py b/gnes/preprocessor/image/sliding_window.py index 4cccdd76..3a32d447 100644 --- a/gnes/preprocessor/image/sliding_window.py +++ b/gnes/preprocessor/image/sliding_window.py @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import io from typing import List @@ -81,7 +82,8 @@ def _get_all_sliding_window(self, img: 'np.ndarray') -> List['np.ndarray']: writeable=False ) expanded_input = expanded_input.reshape((-1, self.window_size, self.window_size, 3)) - return [np.array(Image.fromarray(img).resize((self.target_img_size, self.target_img_size))) for img in expanded_input] + return [np.array(Image.fromarray(img).resize((self.target_img_size, self.target_img_size))) for img in + expanded_input] class VanillaSlidingPreprocessor(BaseSlidingPreprocessor): @@ -94,4 +96,3 @@ class WeightedSlidingPreprocessor(BaseSlidingPreprocessor): def _get_all_chunks_weight(self, image_set: List['np.ndarray']) -> List[float]: return FFmpegPreprocessor.pic_weight(image_set) - diff --git a/gnes/preprocessor/video/ffmpeg.py b/gnes/preprocessor/video/ffmpeg.py index 33d6c488..80a1ab5a 100644 --- a/gnes/preprocessor/video/ffmpeg.py +++ b/gnes/preprocessor/video/ffmpeg.py @@ -14,11 +14,12 @@ # limitations under the License. from typing import List + import numpy as np from .base import BaseVideoPreprocessor -from ...proto import gnes_pb2, array2blob from ..helper import get_video_frames, phash_descriptor +from ...proto import gnes_pb2, array2blob class FFmpegPreprocessor(BaseVideoPreprocessor): diff --git a/gnes/preprocessor/video/shotdetect.py b/gnes/preprocessor/video/shotdetect.py index 29a86f58..377d8ee5 100644 --- a/gnes/preprocessor/video/shotdetect.py +++ b/gnes/preprocessor/video/shotdetect.py @@ -16,9 +16,10 @@ # pylint: disable=low-comment-ratio import numpy as np + from .base import BaseVideoPreprocessor -from ...proto import gnes_pb2, array2blob from ..helper import get_video_frames, compute_descriptor, compare_descriptor +from ...proto import gnes_pb2, array2blob class ShotDetectPreprocessor(BaseVideoPreprocessor): @@ -66,7 +67,7 @@ def apply(self, doc: 'gnes_pb2.Document') -> None: clt = KMeans(n_clusters=2) clt.fit(dists) - #select which cluster includes shot frames + # select which cluster includes shot frames big_center = np.argmax(clt.cluster_centers_) shots = [] diff --git a/gnes/resources/compose/gnes-board.html b/gnes/resources/compose/gnes-board.html index 81fc94e0..822048c7 100644 --- a/gnes/resources/compose/gnes-board.html +++ b/gnes/resources/compose/gnes-board.html @@ -123,10 +123,25 @@ } GNES Board + + + + + + + + + + + + + + +