From a4b883acb312b5f47d34955d3ec2dccb4cd782c6 Mon Sep 17 00:00:00 2001 From: Larry Yan Date: Tue, 23 Jul 2019 16:00:12 +0800 Subject: [PATCH 01/12] fix(indexer): add drop raw bytes option to leveldb --- gnes/indexer/fulltext/leveldb.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/gnes/indexer/fulltext/leveldb.py b/gnes/indexer/fulltext/leveldb.py index 7d15d42b..2a4bf3fa 100644 --- a/gnes/indexer/fulltext/leveldb.py +++ b/gnes/indexer/fulltext/leveldb.py @@ -26,10 +26,16 @@ class LVDBIndexer(BaseTextIndexer): - def __init__(self, data_path: str, keep_na_doc: bool = True, *args, **kwargs): + def __init__(self, data_path: str, + keep_na_doc: bool = True, + drop_raw_bytes: bool = False, + drop_chunk_blob: bool = False, + *args, **kwargs): super().__init__(*args, **kwargs) self.data_path = data_path self.keep_na_doc = keep_na_doc + self.drop_raw_bytes = drop_raw_bytes + self.drop_chunk_blob = drop_chunk_blob self._NOT_FOUND = None def post_init(self): @@ -40,6 +46,11 @@ def add(self, keys: List[int], docs: List['gnes_pb2.Document'], *args, **kwargs) with self._db.write_batch() as wb: for k, d in zip(keys, docs): doc_id = pickle.dumps(k) + if self.drop_raw_bytes: + d.raw_bytes = b'' + if self.drop_chunk_blob: + for i in range(len(d.chunks)): + d.chunks[i].ClearField('blob') doc = d.SerializeToString() wb.put(doc_id, doc) From ab6c88ccfe54ba5f96f09510e97b9658c553c1a9 Mon Sep 17 00:00:00 2001 From: Larry Yan Date: Tue, 23 Jul 2019 16:47:33 +0800 Subject: [PATCH 02/12] fix(encoder): fix error in cvae encoder --- gnes/encoder/__init__.py | 1 + gnes/encoder/image/cvae.py | 2 +- gnes/encoder/image/cvae_cores/__init__.py | 0 3 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 gnes/encoder/image/cvae_cores/__init__.py diff --git a/gnes/encoder/__init__.py b/gnes/encoder/__init__.py index 1e6ffa0a..e77f2411 100644 --- a/gnes/encoder/__init__.py +++ b/gnes/encoder/__init__.py @@ -40,6 +40,7 @@ 'HashEncoder': 'numeric.hash', 'BasePytorchEncoder': 'image.base', 'TFInceptionEncoder': 'image.inception', + 'CVAEEncoder': 'image.cvae' } register_all_class(_cls2file_map, 'encoder') diff --git a/gnes/encoder/image/cvae.py b/gnes/encoder/image/cvae.py index f297e17d..5489f85a 100644 --- a/gnes/encoder/image/cvae.py +++ b/gnes/encoder/image/cvae.py @@ -40,7 +40,7 @@ def __init__(self, model_dir: str, def post_init(self): import tensorflow as tf - from .cave_cores.model import CVAE + from .cvae_cores.model import CVAE self._model = CVAE(self.latent_dim) self.inputs = tf.placeholder(tf.float32, diff --git a/gnes/encoder/image/cvae_cores/__init__.py b/gnes/encoder/image/cvae_cores/__init__.py new file mode 100644 index 00000000..e69de29b From eb487799b3e4b602738765d9ad5edea997147930 Mon Sep 17 00:00:00 2001 From: Larry Yan Date: Tue, 23 Jul 2019 17:04:29 +0800 Subject: [PATCH 03/12] fix(encoder): fix tf scope error in cvae encoder --- gnes/encoder/image/cvae.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/gnes/encoder/image/cvae.py b/gnes/encoder/image/cvae.py index 5489f85a..dd30b119 100644 --- a/gnes/encoder/image/cvae.py +++ b/gnes/encoder/image/cvae.py @@ -41,19 +41,20 @@ def __init__(self, model_dir: str, def post_init(self): import tensorflow as tf from .cvae_cores.model import CVAE + g = tf.Graph() + with g.as_default(): + self._model = CVAE(self.latent_dim) + self.inputs = tf.placeholder(tf.float32, + (None, 120, 120, 3)) - self._model = CVAE(self.latent_dim) - self.inputs = tf.placeholder(tf.float32, - (None, 120, 120, 3)) + self.mean, self.var = self._model.encode(self.inputs) - self.mean, self.var = self._model.encode(self.inputs) - - config = tf.ConfigProto(log_device_placement=False) - if self.use_gpu: - config.gpu_options.allow_growth = True - self.sess = tf.Session(config=config) - self.saver = tf.train.Saver() - self.saver.restore(self.sess, self.model_dir) + config = tf.ConfigProto(log_device_placement=False) + if self.use_gpu: + config.gpu_options.allow_growth = True + self.sess = tf.Session(config=config) + self.saver = tf.train.Saver() + self.saver.restore(self.sess, self.model_dir) def encode(self, img: List['np.ndarray'], *args, **kwargs) -> np.ndarray: ret = [] From 649ed1314b9c12167a958d6f8e259944ebdf96e3 Mon Sep 17 00:00:00 2001 From: Larry Yan Date: Tue, 23 Jul 2019 17:25:04 +0800 Subject: [PATCH 04/12] fix(encoder): add normalize option in cvae encoder --- gnes/encoder/image/cvae.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/gnes/encoder/image/cvae.py b/gnes/encoder/image/cvae.py index dd30b119..84c3f9dd 100644 --- a/gnes/encoder/image/cvae.py +++ b/gnes/encoder/image/cvae.py @@ -28,6 +28,7 @@ def __init__(self, model_dir: str, latent_dim: int = 300, batch_size: int = 64, select_method: str = 'MEAN', + l2_normalize: bool = False, use_gpu: bool = True, *args, **kwargs): super().__init__(*args, **kwargs) @@ -36,6 +37,7 @@ def __init__(self, model_dir: str, self.latent_dim = latent_dim self.batch_size = batch_size self.select_method = select_method + self.l2_normalize = l2_normalize self.use_gpu = use_gpu def post_init(self): @@ -69,4 +71,7 @@ def encode(self, img: List['np.ndarray'], *args, **kwargs) -> np.ndarray: ret.append(_var) elif self.select_method == 'MEAN_VAR': ret.append(np.concatenate([_mean, _var]), axis=1) - return np.concatenate(ret, axis=0).astype(np.float32) + v = np.concatenate(ret, axis=0).astype(np.float32) + if self.l2_normalize: + v = v / (v**2).sum(axis=1, keepdims=True)**0.5 + return v From de7560c09edff1bde6cd8766a6fb3b619abce5a5 Mon Sep 17 00:00:00 2001 From: Jem Date: Tue, 23 Jul 2019 23:39:28 +0800 Subject: [PATCH 05/12] feat(image preprocessor): calculate offsetnd for each chunk --- gnes-board.html | 464 ++++++++++++++++++++ gnes/preprocessor/image/base.py | 13 + gnes/preprocessor/image/segmentation.py | 25 +- gnes/preprocessor/image/sliding_window.py | 38 +- tests/yaml/base-segmentation-image-prep.yml | 1 + 5 files changed, 536 insertions(+), 5 deletions(-) create mode 100644 gnes-board.html diff --git a/gnes-board.html b/gnes-board.html new file mode 100644 index 00000000..ed3171d0 --- /dev/null +++ b/gnes-board.html @@ -0,0 +1,464 @@ + + + + + + + + + GNES Board + + + + + + + + + + + + + + + + + + +
+
+
+
+
+ YAML config +
+
+
+
+
+ + +
+
+
+ + +
+ +
+ +
+
+
+
+
+
+
+ +
+
+ +

This is the workflow generated from your input YAML config, which helps you + to understand how microservices work together in GNES.

+
+
+
+ Workflow +
+
+
+ graph TD + gRPCFrontend000(gRPCFrontend)-- push/pull -->Preprocessor100(Preprocessor) + Preprocessor100(Preprocessor)-- push/pull -->Encoder200(Encoder) + Encoder200(Encoder)-- push/pull -->Indexer300(Indexer) + Indexer300(Indexer)-- push/pull -->gRPCFrontend000(gRPCFrontend) +classDef FrontendCLS fill:#ffb347,stroke:#277CE8,stroke-width:1px,stroke-dasharray:5; +classDef EncoderCLS fill:#27E1E8,stroke:#277CE8,stroke-width:1px; +classDef IndexerCLS fill:#27E1E8,stroke:#277CE8,stroke-width:1px; +classDef RouterCLS fill:#2BFFCB,stroke:#277CE8,stroke-width:1px; +classDef PreprocessorCLS fill:#27E1E8,stroke:#277CE8,stroke-width:1px; +class gRPCFrontend000 gRPCFrontendCLS; +class Preprocessor100 PreprocessorCLS; +class Encoder200 EncoderCLS; +class Indexer300 IndexerCLS; +
+
+
+
+
+
+ +

This is a Bash script generated from your YAML config. + You can use it to start a GNES server on a local machine.

+
+

1. Install GNES via pip install gnes
+ 2. Create a new file say run.sh
+ 3. Copy the following content to it and run it via bash ./run.sh.

+
+
+
+ Shell script +
+
+ +
+                    
+#!/usr/bin/env bash
+
+## Prerequirment of this script
+## You need to install GNES locally on this local machine
+## pip install gnes
+
+set -e
+
+trap 'kill $(jobs -p)' EXIT
+
+printf "starting service gRPCFrontend with 1 replicas...\n"
+gnes frontend --grpc_port 5566 --port_out 60724 --socket_out PUSH_BIND --port_in 56269 --socket_in PULL_CONNECT  &
+printf "starting service Preprocessor with 1 replicas...\n"
+gnes preprocess --yaml_path text-prep.yaml --port_in 60724 --socket_in PULL_CONNECT --port_out 59459 --socket_out PUSH_BIND  &
+printf "starting service Encoder with 1 replicas...\n"
+gnes encode --yaml_path gpt2.yml --port_in 59459 --socket_in PULL_CONNECT --port_out 52124 --socket_out PUSH_BIND  &
+printf "starting service Indexer with 1 replicas...\n"
+gnes index --yaml_path b-indexer.yml --port_in 52124 --socket_in PULL_CONNECT --port_out 56269 --socket_out PUSH_BIND  &
+
+wait
+                    
+                
+
+
+
+ +
+
+ +

This is a docker-compose YAML file generated from your YAML config. + You can use it to start a Docker Swarm distributed on multiple machines.

+
+

1. Install Docker and Docker Swarm
+ 2. Create a new file say my-compose.yml
+ 3. Copy the following content to it
+ 4. Run docker stack deploy --compose-file my-compose.yml.

+
+
+
+ Docker-Swarm/Compose config +
+
+ +
+                    
+version: '3.4'
+services:
+  gRPCFrontend00:
+    image: gnes/gnes:latest
+    command: frontend --grpc_port 5566 --port_out 60724 --socket_out PUSH_BIND --port_in
+      56269 --socket_in PULL_CONNECT --host_in Indexer30
+    ports:
+    - 5566:5566
+  Preprocessor10:
+    image: gnes/gnes:latest
+    command: preprocess --yaml_path text-prep.yaml --port_in 60724 --socket_in PULL_CONNECT
+      --port_out 59459 --socket_out PUSH_BIND --yaml_path /Preprocessor10_yaml --host_in
+      gRPCFrontend00
+    configs:
+    - Preprocessor10_yaml
+  Encoder20:
+    image: gnes/gnes:latest
+    command: encode --yaml_path gpt2.yml --port_in 59459 --socket_in PULL_CONNECT
+      --port_out 52124 --socket_out PUSH_BIND --yaml_path /Encoder20_yaml --host_in
+      Preprocessor10
+    configs:
+    - Encoder20_yaml
+  Indexer30:
+    image: gnes/gnes:latest
+    command: index --yaml_path b-indexer.yml --port_in 52124 --socket_in PULL_CONNECT
+      --port_out 56269 --socket_out PUSH_BIND --yaml_path /Indexer30_yaml --host_in
+      Encoder20
+    configs:
+    - Indexer30_yaml
+volumes: {}
+networks:
+  gnes-net:
+    driver: overlay
+    attachable: true
+configs:
+  Preprocessor10_yaml:
+    file: text-prep.yaml
+  Encoder20_yaml:
+    file: gpt2.yml
+  Indexer30_yaml:
+    file: b-indexer.yml
+
+                    
+                
+
+
+
+ +
+
+

The generation of Kubenetes config is currently under development.

+
+
+
+ Docker-Swarm/Docker-compose config +
+
+ +
+                    
+{{gnes-k8s}}
+                    
+                
+
+
+
+
+ + +
+ +
+
+ + + + + + + + + + + + + \ No newline at end of file diff --git a/gnes/preprocessor/image/base.py b/gnes/preprocessor/image/base.py index e2e72d56..f41ad8a3 100644 --- a/gnes/preprocessor/image/base.py +++ b/gnes/preprocessor/image/base.py @@ -37,3 +37,16 @@ def _torch_transform(cls, image): import torchvision.transforms as transforms return transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))])(image) + + @classmethod + def _get_offset_nd(cls, image, index, chunk): + pass + + def _get_all_subarea(self, image): + from itertools import product + x_list = [0, image.size[0] / 3, 2 * image.size[0] / 3, image.size[0]] + y_list = [0, image.size[1] / 3, 2 * image.size[1] / 3, image.size[1]] + + index = [[x, y, x + 1, y + 1] for [x, y] in product(range(len(x_list) - 1), range(len(y_list) - 1))] + all_subareas = [[x_list[idx[0]], y_list[idx[1]], x_list[idx[2]], y_list[idx[3]]] for idx in index] + return all_subareas, index \ No newline at end of file diff --git a/gnes/preprocessor/image/segmentation.py b/gnes/preprocessor/image/segmentation.py index 5b9fa958..a8256986 100644 --- a/gnes/preprocessor/image/segmentation.py +++ b/gnes/preprocessor/image/segmentation.py @@ -18,6 +18,7 @@ import numpy as np from PIL import Image +from typing import List from .base import BaseImagePreprocessor from ...proto import array2blob @@ -52,6 +53,7 @@ def apply(self, doc: 'gnes_pb2.Document'): super().apply(doc) if doc.raw_bytes: original_image = Image.open(io.BytesIO(doc.raw_bytes)) + all_subareas, index = self._get_all_subarea(original_image) image_tensor = self._torch_transform(original_image) if self._use_cuda: image_tensor = image_tensor.cuda() @@ -68,6 +70,7 @@ def apply(self, doc: 'gnes_pb2.Document'): c.doc_id = doc.doc_id c.blob.CopyFrom(array2blob(self._crop_image_reshape(original_image, ele[0]))) c.offset_1d = ci + c.offset_nd.x.extend(self._get_offset_nd(all_subareas, index, ele[0])) c.weight = self._cal_area(ele[0]) / (original_image.size[0] * original_image.size[1]) c = doc.chunks.add() @@ -75,6 +78,7 @@ def apply(self, doc: 'gnes_pb2.Document'): c.blob.CopyFrom(array2blob(np.array(original_image.resize((self.target_img_size, self.target_img_size))))) c.offset_1d = len(chunks) + c.offset_nd.x.extend([100, 100]) c.weight = 1. else: self.logger.error('bad document: "raw_bytes" is empty!') @@ -83,6 +87,25 @@ def _crop_image_reshape(self, original_image, coordinates): return np.array(original_image.crop(coordinates).resize((self.target_img_size, self.target_img_size))) + def _get_offset_nd(self, all_subareas: List[List[int]], index: List[List[int]], chunk: List[int]) -> List[int]: + iou_list = [self._cal_iou(area, chunk) for area in all_subareas] + return index[int(np.argmax(iou_list))][:2] + @classmethod - def _cal_area(cls, coordinate): + def _cal_area(cls, coordinate: List[int]): return (coordinate[2] - coordinate[0]) * (coordinate[3] - coordinate[1]) + + def _cal_iou(self, image: List[int], chunk: List[int]) -> float: + chunk_area = self._cal_area(chunk) + image_area = self._cal_area(image) + + x1 = max(chunk[0], image[0]) + y1 = max(chunk[1], image[1]) + x2 = min(chunk[2], image[2]) + y2 = min(chunk[3], image[3]) + + overlap_area = max(0, x2 - x1) * max(0, y2 - y1) + + iou = overlap_area / (chunk_area + image_area - overlap_area) + + return iou diff --git a/gnes/preprocessor/image/sliding_window.py b/gnes/preprocessor/image/sliding_window.py index 3a32d447..42530a60 100644 --- a/gnes/preprocessor/image/sliding_window.py +++ b/gnes/preprocessor/image/sliding_window.py @@ -40,8 +40,9 @@ def __init__(self, window_size: int = 64, def apply(self, doc: 'gnes_pb2.Document'): super().apply(doc) if doc.raw_bytes: - img = np.array(Image.open(io.BytesIO(doc.raw_bytes))) - image_set = self._get_all_sliding_window(img) + original_image = Image.open(io.BytesIO(doc.raw_bytes)) + all_subareas, index = self._get_all_subarea(original_image) + image_set, center_point_list = self._get_all_sliding_window(np.array(original_image)) normalizaed_image_set = [np.array(self._torch_transform(img)).transpose(1, 2, 0) for img in image_set] weight = self._get_all_chunks_weight(normalizaed_image_set) @@ -51,11 +52,12 @@ def apply(self, doc: 'gnes_pb2.Document'): c.doc_id = doc.doc_id c.blob.CopyFrom(array2blob(ele[0])) c.offset_1d = ci + c.offset_nd.x.extend(self._get_offset_nd(all_subareas, index, center_point_list[ci])) c.weight = ele[1] else: self.logger.error('bad document: "raw_bytes" is empty!') - def _get_all_sliding_window(self, img: 'np.ndarray') -> List['np.ndarray']: + def _get_all_sliding_window(self, img: 'np.ndarray'): extend_height = self.window_size - (img.shape[0]) % self.stride_height extend_wide = self.window_size - (img.shape[1]) % self.stride_wide @@ -81,9 +83,37 @@ def _get_all_sliding_window(self, img: 'np.ndarray') -> List['np.ndarray']: ), writeable=False ) + center_point_list = [ + [self.window_size / 2 + x * self.stride_wide, self.window_size / 2 + y * self.stride_height] + for x in range(expanded_input.shape[0]) + for y in range(expanded_input.shape[1])] + expanded_input = expanded_input.reshape((-1, self.window_size, self.window_size, 3)) return [np.array(Image.fromarray(img).resize((self.target_img_size, self.target_img_size))) for img in - expanded_input] + expanded_input], center_point_list + + def _get_offset_nd(self, all_subareas: List[List[int]], index: List[List[int]], center_point: List[float]) -> List[ + int]: + location_list = self._get_location(all_subareas, center_point) + location = [i for i in range(len(location_list)) if location_list[i] is True][0] + return index[location][:2] + + @classmethod + def _get_location(cls, all_subareas: List[List[int]], center_point: List[float]) -> List[bool]: + location_list = [] + x_boundary = max([x[1] for x in all_subareas]) + y_boundary = max([y[3] for y in all_subareas]) + for area in all_subareas: + if center_point[0] in range(int(area[0]), int(area[2])) and center_point[1] in range(int(area[1]), + int(area[3])): + location_list.append(True) + elif center_point[0] in range(int(area[0]), int(area[2])) and y_boundary == area[3] and center_point[1] > y_boundary: + location_list.append(True) + elif center_point[1] in range(int(area[1]), int(area[3])) and x_boundary == area[2] and center_point[0] > x_boundary: + location_list.append(True) + else: + location_list.append(False) + return location_list class VanillaSlidingPreprocessor(BaseSlidingPreprocessor): diff --git a/tests/yaml/base-segmentation-image-prep.yml b/tests/yaml/base-segmentation-image-prep.yml index bd81db6f..f36c3b55 100644 --- a/tests/yaml/base-segmentation-image-prep.yml +++ b/tests/yaml/base-segmentation-image-prep.yml @@ -5,4 +5,5 @@ parameter: target_img_size: 224 _use_cuda: false gnes_config: + name: fasterRCNN is_trained: true \ No newline at end of file From 753c148e34ad016aff50fcab366511491782f33b Mon Sep 17 00:00:00 2001 From: Jem Date: Tue, 23 Jul 2019 23:45:38 +0800 Subject: [PATCH 06/12] feat(image preprocessor): calculate offsetnd for each chunk --- gnes-board.html | 464 ------------------------------------------------ 1 file changed, 464 deletions(-) delete mode 100644 gnes-board.html diff --git a/gnes-board.html b/gnes-board.html deleted file mode 100644 index ed3171d0..00000000 --- a/gnes-board.html +++ /dev/null @@ -1,464 +0,0 @@ - - - - - - - - - GNES Board - - - - - - - - - - - - - - - - - - -
-
-
-
-
- YAML config -
-
-
-
-
- - -
-
-
- - -
- -
- -
-
-
-
-
-
-
- -
-
- -

This is the workflow generated from your input YAML config, which helps you - to understand how microservices work together in GNES.

-
-
-
- Workflow -
-
-
- graph TD - gRPCFrontend000(gRPCFrontend)-- push/pull -->Preprocessor100(Preprocessor) - Preprocessor100(Preprocessor)-- push/pull -->Encoder200(Encoder) - Encoder200(Encoder)-- push/pull -->Indexer300(Indexer) - Indexer300(Indexer)-- push/pull -->gRPCFrontend000(gRPCFrontend) -classDef FrontendCLS fill:#ffb347,stroke:#277CE8,stroke-width:1px,stroke-dasharray:5; -classDef EncoderCLS fill:#27E1E8,stroke:#277CE8,stroke-width:1px; -classDef IndexerCLS fill:#27E1E8,stroke:#277CE8,stroke-width:1px; -classDef RouterCLS fill:#2BFFCB,stroke:#277CE8,stroke-width:1px; -classDef PreprocessorCLS fill:#27E1E8,stroke:#277CE8,stroke-width:1px; -class gRPCFrontend000 gRPCFrontendCLS; -class Preprocessor100 PreprocessorCLS; -class Encoder200 EncoderCLS; -class Indexer300 IndexerCLS; -
-
-
-
-
-
- -

This is a Bash script generated from your YAML config. - You can use it to start a GNES server on a local machine.

-
-

1. Install GNES via pip install gnes
- 2. Create a new file say run.sh
- 3. Copy the following content to it and run it via bash ./run.sh.

-
-
-
- Shell script -
-
- -
-                    
-#!/usr/bin/env bash
-
-## Prerequirment of this script
-## You need to install GNES locally on this local machine
-## pip install gnes
-
-set -e
-
-trap 'kill $(jobs -p)' EXIT
-
-printf "starting service gRPCFrontend with 1 replicas...\n"
-gnes frontend --grpc_port 5566 --port_out 60724 --socket_out PUSH_BIND --port_in 56269 --socket_in PULL_CONNECT  &
-printf "starting service Preprocessor with 1 replicas...\n"
-gnes preprocess --yaml_path text-prep.yaml --port_in 60724 --socket_in PULL_CONNECT --port_out 59459 --socket_out PUSH_BIND  &
-printf "starting service Encoder with 1 replicas...\n"
-gnes encode --yaml_path gpt2.yml --port_in 59459 --socket_in PULL_CONNECT --port_out 52124 --socket_out PUSH_BIND  &
-printf "starting service Indexer with 1 replicas...\n"
-gnes index --yaml_path b-indexer.yml --port_in 52124 --socket_in PULL_CONNECT --port_out 56269 --socket_out PUSH_BIND  &
-
-wait
-                    
-                
-
-
-
- -
-
- -

This is a docker-compose YAML file generated from your YAML config. - You can use it to start a Docker Swarm distributed on multiple machines.

-
-

1. Install Docker and Docker Swarm
- 2. Create a new file say my-compose.yml
- 3. Copy the following content to it
- 4. Run docker stack deploy --compose-file my-compose.yml.

-
-
-
- Docker-Swarm/Compose config -
-
- -
-                    
-version: '3.4'
-services:
-  gRPCFrontend00:
-    image: gnes/gnes:latest
-    command: frontend --grpc_port 5566 --port_out 60724 --socket_out PUSH_BIND --port_in
-      56269 --socket_in PULL_CONNECT --host_in Indexer30
-    ports:
-    - 5566:5566
-  Preprocessor10:
-    image: gnes/gnes:latest
-    command: preprocess --yaml_path text-prep.yaml --port_in 60724 --socket_in PULL_CONNECT
-      --port_out 59459 --socket_out PUSH_BIND --yaml_path /Preprocessor10_yaml --host_in
-      gRPCFrontend00
-    configs:
-    - Preprocessor10_yaml
-  Encoder20:
-    image: gnes/gnes:latest
-    command: encode --yaml_path gpt2.yml --port_in 59459 --socket_in PULL_CONNECT
-      --port_out 52124 --socket_out PUSH_BIND --yaml_path /Encoder20_yaml --host_in
-      Preprocessor10
-    configs:
-    - Encoder20_yaml
-  Indexer30:
-    image: gnes/gnes:latest
-    command: index --yaml_path b-indexer.yml --port_in 52124 --socket_in PULL_CONNECT
-      --port_out 56269 --socket_out PUSH_BIND --yaml_path /Indexer30_yaml --host_in
-      Encoder20
-    configs:
-    - Indexer30_yaml
-volumes: {}
-networks:
-  gnes-net:
-    driver: overlay
-    attachable: true
-configs:
-  Preprocessor10_yaml:
-    file: text-prep.yaml
-  Encoder20_yaml:
-    file: gpt2.yml
-  Indexer30_yaml:
-    file: b-indexer.yml
-
-                    
-                
-
-
-
- -
-
-

The generation of Kubenetes config is currently under development.

-
-
-
- Docker-Swarm/Docker-compose config -
-
- -
-                    
-{{gnes-k8s}}
-                    
-                
-
-
-
-
- - -
- -
-
- - - - - - - - - - - - - \ No newline at end of file From b441c57d1027b3ce368a7e4a8c2ac8ef7779957d Mon Sep 17 00:00:00 2001 From: Jem Date: Tue, 23 Jul 2019 23:50:26 +0800 Subject: [PATCH 07/12] feat(image preprocessor): calculate offsetnd for each chunk --- gnes/preprocessor/image/sliding_window.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gnes/preprocessor/image/sliding_window.py b/gnes/preprocessor/image/sliding_window.py index 42530a60..4ff0ad49 100644 --- a/gnes/preprocessor/image/sliding_window.py +++ b/gnes/preprocessor/image/sliding_window.py @@ -106,7 +106,7 @@ def _get_location(cls, all_subareas: List[List[int]], center_point: List[float]) for area in all_subareas: if center_point[0] in range(int(area[0]), int(area[2])) and center_point[1] in range(int(area[1]), int(area[3])): - location_list.append(True) + location_list.append(True) elif center_point[0] in range(int(area[0]), int(area[2])) and y_boundary == area[3] and center_point[1] > y_boundary: location_list.append(True) elif center_point[1] in range(int(area[1]), int(area[3])) and x_boundary == area[2] and center_point[0] > x_boundary: From d9792c6de16a8b16bd41042df119079cb5cbe362 Mon Sep 17 00:00:00 2001 From: Jem Date: Tue, 23 Jul 2019 23:54:37 +0800 Subject: [PATCH 08/12] feat(image preprocessor): calculate offsetnd for each chunk --- gnes/preprocessor/image/sliding_window.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/gnes/preprocessor/image/sliding_window.py b/gnes/preprocessor/image/sliding_window.py index 4ff0ad49..42ba9cb4 100644 --- a/gnes/preprocessor/image/sliding_window.py +++ b/gnes/preprocessor/image/sliding_window.py @@ -92,8 +92,7 @@ def _get_all_sliding_window(self, img: 'np.ndarray'): return [np.array(Image.fromarray(img).resize((self.target_img_size, self.target_img_size))) for img in expanded_input], center_point_list - def _get_offset_nd(self, all_subareas: List[List[int]], index: List[List[int]], center_point: List[float]) -> List[ - int]: + def _get_offset_nd(self, all_subareas: List[List[int]], index: List[List[int]], center_point: List[float]) -> List[int]: location_list = self._get_location(all_subareas, center_point) location = [i for i in range(len(location_list)) if location_list[i] is True][0] return index[location][:2] @@ -106,7 +105,7 @@ def _get_location(cls, all_subareas: List[List[int]], center_point: List[float]) for area in all_subareas: if center_point[0] in range(int(area[0]), int(area[2])) and center_point[1] in range(int(area[1]), int(area[3])): - location_list.append(True) + location_list.append(True) elif center_point[0] in range(int(area[0]), int(area[2])) and y_boundary == area[3] and center_point[1] > y_boundary: location_list.append(True) elif center_point[1] in range(int(area[1]), int(area[3])) and x_boundary == area[2] and center_point[0] > x_boundary: From b1eaa87e46be85f678fcd8bbb56313ca8b45368d Mon Sep 17 00:00:00 2001 From: Jem Date: Wed, 24 Jul 2019 10:29:14 +0800 Subject: [PATCH 09/12] feat(image preprocessor): calculate offsetnd for each chunk --- gnes/preprocessor/image/base.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/gnes/preprocessor/image/base.py b/gnes/preprocessor/image/base.py index f41ad8a3..d1e86f94 100644 --- a/gnes/preprocessor/image/base.py +++ b/gnes/preprocessor/image/base.py @@ -39,10 +39,7 @@ def _torch_transform(cls, image): transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))])(image) @classmethod - def _get_offset_nd(cls, image, index, chunk): - pass - - def _get_all_subarea(self, image): + def _get_all_subarea(cls, image): from itertools import product x_list = [0, image.size[0] / 3, 2 * image.size[0] / 3, image.size[0]] y_list = [0, image.size[1] / 3, 2 * image.size[1] / 3, image.size[1]] From 9b422278d2d43cabe5484c1c6024dc172fbbf79b Mon Sep 17 00:00:00 2001 From: Jem Date: Wed, 24 Jul 2019 10:35:16 +0800 Subject: [PATCH 10/12] feat(image preprocessor): calculate offsetnd for each chunk --- gnes/preprocessor/image/segmentation.py | 4 ++-- gnes/preprocessor/image/sliding_window.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/gnes/preprocessor/image/segmentation.py b/gnes/preprocessor/image/segmentation.py index a8256986..f6c2bf05 100644 --- a/gnes/preprocessor/image/segmentation.py +++ b/gnes/preprocessor/image/segmentation.py @@ -70,7 +70,7 @@ def apply(self, doc: 'gnes_pb2.Document'): c.doc_id = doc.doc_id c.blob.CopyFrom(array2blob(self._crop_image_reshape(original_image, ele[0]))) c.offset_1d = ci - c.offset_nd.x.extend(self._get_offset_nd(all_subareas, index, ele[0])) + c.offset_nd.x.extend(self._get_seg_offset_nd(all_subareas, index, ele[0])) c.weight = self._cal_area(ele[0]) / (original_image.size[0] * original_image.size[1]) c = doc.chunks.add() @@ -87,7 +87,7 @@ def _crop_image_reshape(self, original_image, coordinates): return np.array(original_image.crop(coordinates).resize((self.target_img_size, self.target_img_size))) - def _get_offset_nd(self, all_subareas: List[List[int]], index: List[List[int]], chunk: List[int]) -> List[int]: + def _get_seg_offset_nd(self, all_subareas: List[List[int]], index: List[List[int]], chunk: List[int]) -> List[int]: iou_list = [self._cal_iou(area, chunk) for area in all_subareas] return index[int(np.argmax(iou_list))][:2] diff --git a/gnes/preprocessor/image/sliding_window.py b/gnes/preprocessor/image/sliding_window.py index 42ba9cb4..ff181893 100644 --- a/gnes/preprocessor/image/sliding_window.py +++ b/gnes/preprocessor/image/sliding_window.py @@ -52,7 +52,7 @@ def apply(self, doc: 'gnes_pb2.Document'): c.doc_id = doc.doc_id c.blob.CopyFrom(array2blob(ele[0])) c.offset_1d = ci - c.offset_nd.x.extend(self._get_offset_nd(all_subareas, index, center_point_list[ci])) + c.offset_nd.x.extend(self._get_slid_offset_nd(all_subareas, index, center_point_list[ci])) c.weight = ele[1] else: self.logger.error('bad document: "raw_bytes" is empty!') @@ -92,7 +92,7 @@ def _get_all_sliding_window(self, img: 'np.ndarray'): return [np.array(Image.fromarray(img).resize((self.target_img_size, self.target_img_size))) for img in expanded_input], center_point_list - def _get_offset_nd(self, all_subareas: List[List[int]], index: List[List[int]], center_point: List[float]) -> List[int]: + def _get_slid_offset_nd(self, all_subareas: List[List[int]], index: List[List[int]], center_point: List[float]) -> List[int]: location_list = self._get_location(all_subareas, center_point) location = [i for i in range(len(location_list)) if location_list[i] is True][0] return index[location][:2] From 04c9c74556be8dd343de1cdb6375dc744d4da531 Mon Sep 17 00:00:00 2001 From: Jem Date: Wed, 24 Jul 2019 11:00:40 +0800 Subject: [PATCH 11/12] feat(image preprocessor): calculate offsetnd for each chunk --- gnes/preprocessor/image/base.py | 8 ++++---- gnes/preprocessor/image/segmentation.py | 6 ++---- gnes/preprocessor/image/sliding_window.py | 4 ++-- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/gnes/preprocessor/image/base.py b/gnes/preprocessor/image/base.py index d1e86f94..a557cbe9 100644 --- a/gnes/preprocessor/image/base.py +++ b/gnes/preprocessor/image/base.py @@ -32,14 +32,14 @@ def __init__(self, def _get_all_chunks_weight(self, image_set: List['np.ndarray']) -> List[float]: pass - @classmethod - def _torch_transform(cls, image): + @staticmethod + def _torch_transform(image): import torchvision.transforms as transforms return transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))])(image) - @classmethod - def _get_all_subarea(cls, image): + @staticmethod + def _get_all_subarea(image): from itertools import product x_list = [0, image.size[0] / 3, 2 * image.size[0] / 3, image.size[0]] y_list = [0, image.size[1] / 3, 2 * image.size[1] / 3, image.size[1]] diff --git a/gnes/preprocessor/image/segmentation.py b/gnes/preprocessor/image/segmentation.py index f6c2bf05..b5a6392f 100644 --- a/gnes/preprocessor/image/segmentation.py +++ b/gnes/preprocessor/image/segmentation.py @@ -91,8 +91,8 @@ def _get_seg_offset_nd(self, all_subareas: List[List[int]], index: List[List[int iou_list = [self._cal_iou(area, chunk) for area in all_subareas] return index[int(np.argmax(iou_list))][:2] - @classmethod - def _cal_area(cls, coordinate: List[int]): + @staticmethod + def _cal_area(coordinate: List[int]): return (coordinate[2] - coordinate[0]) * (coordinate[3] - coordinate[1]) def _cal_iou(self, image: List[int], chunk: List[int]) -> float: @@ -105,7 +105,5 @@ def _cal_iou(self, image: List[int], chunk: List[int]) -> float: y2 = min(chunk[3], image[3]) overlap_area = max(0, x2 - x1) * max(0, y2 - y1) - iou = overlap_area / (chunk_area + image_area - overlap_area) - return iou diff --git a/gnes/preprocessor/image/sliding_window.py b/gnes/preprocessor/image/sliding_window.py index ff181893..bef92f53 100644 --- a/gnes/preprocessor/image/sliding_window.py +++ b/gnes/preprocessor/image/sliding_window.py @@ -97,8 +97,8 @@ def _get_slid_offset_nd(self, all_subareas: List[List[int]], index: List[List[in location = [i for i in range(len(location_list)) if location_list[i] is True][0] return index[location][:2] - @classmethod - def _get_location(cls, all_subareas: List[List[int]], center_point: List[float]) -> List[bool]: + @staticmethod + def _get_location(all_subareas: List[List[int]], center_point: List[float]) -> List[bool]: location_list = [] x_boundary = max([x[1] for x in all_subareas]) y_boundary = max([y[3] for y in all_subareas]) From 2c6964837c4bc7387b0585d26b4f8ab5f80d3909 Mon Sep 17 00:00:00 2001 From: Larry Yan Date: Wed, 24 Jul 2019 13:07:06 +0800 Subject: [PATCH 12/12] fix(indexer): fix weight in indexer call --- gnes/proto/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/gnes/proto/__init__.py b/gnes/proto/__init__.py index 7d4e0b9d..058768fd 100644 --- a/gnes/proto/__init__.py +++ b/gnes/proto/__init__.py @@ -34,6 +34,7 @@ def index(data: List[bytes], batch_size: int = 0, *args, **kwargs): for raw_bytes in pi: d = req.index.docs.add() d.raw_bytes = raw_bytes + d.weight = 1.0 yield req @staticmethod