feat(service): add ServiceManager and enable parallel services in one…

… container
gnes-ai · Aug 8, 2019 · ccfd474 · ccfd474
1 parent c2a9a80
commit ccfd474
Show file tree

Hide file tree

Showing 15 changed files with 239 additions and 45 deletions.
diff --git a/gnes/cli/parser.py b/gnes/cli/parser.py
@@ -106,7 +106,7 @@ def set_composer_flask_parser(parser=None):
 
 
 def set_service_parser(parser=None):
-    from ..service.base import SocketType, BaseService
+    from ..service.base import SocketType, BaseService, ParallelType
     import random
     if not parser:
         parser = set_base_parser()
@@ -134,8 +134,15 @@ def set_service_parser(parser=None):
     parser.add_argument('--read_only', action='store_true', default=False,
                         help='do not allow the service to modify the model, '
                              'dump_interval will be ignored')
-    parser.add_argument('--concurrency_backend', type=str, choices=['thread', 'process'], default='thread',
-                        help='concurrency backend of the service')
+    parser.add_argument('--parallel_backend', type=str, choices=['thread', 'process'], default='thread',
+                        help='parallel backend of the service')
+    parser.add_argument('--num_parallel', type=int, default=1,
+                        help='number of parallel services running at the same time, '
+                             '`port_in` and `port_out` will be set to random, '
+                             'and routers will be added automatically when necessary')
+    parser.add_argument('--parallel_type', type=ParallelType.from_string, choices=list(ParallelType),
+                        default=ParallelType.PUSH_NONBLOCK,
+                        help='parallel type of the concurrent services')
     return parser
 
 
@@ -172,7 +179,6 @@ def set_preprocessor_service_parser(parser=None):
     if not parser:
         parser = set_base_parser()
     set_loadable_service_parser(parser)
-
     parser.set_defaults(read_only=True)
     return parser
 
@@ -181,6 +187,8 @@ def set_router_service_parser(parser=None):
     if not parser:
         parser = set_base_parser()
     set_loadable_service_parser(parser)
+    parser.add_argument('--num_part', type=int, default=1,
+                        help='explicitly set the number of parts of message')
     parser.set_defaults(read_only=True)
     return parser
 

diff --git a/gnes/encoder/audio/mfcc.py b/gnes/encoder/audio/mfcc.py
@@ -15,10 +15,12 @@
 
 # pylint: disable=low-comment-ratio
 
+from typing import List
+
+import numpy as np
+
 from ..base import BaseAudioEncoder
 from ...helper import batching
-import numpy as np
-from typing import List
 
 
 class MfccEncoder(BaseAudioEncoder):
@@ -38,9 +40,8 @@ def encode(self, data: List['np.array'], *args, **kwargs) -> np.ndarray:
 
         max_lenth = max([len(mf) for mf in mfccs])
 
-
         mfccs = [np.concatenate((mf, np.zeros((max_lenth - mf.shape[0], self.n_mfcc), dtype=np.float32)), axis=0)
                  if mf.shape[0] < max_lenth else mf for mf in mfccs]
         mfccs = [mfcc.reshape((1, -1)) for mfcc in mfccs]
         mfccs = np.squeeze(np.array(mfccs), axis=1)
-        return mfccs
+        return mfccs
diff --git a/gnes/encoder/numeric/vlad.py b/gnes/encoder/numeric/vlad.py
@@ -16,8 +16,10 @@
 # pylint: disable=low-comment-ratio
 
 
-import numpy as np
 import copy
+
+import numpy as np
+
 from ..base import BaseNumericEncoder
 from ...helper import batching, train_required
 

diff --git a/gnes/encoder/video/mixture_core/model.py b/gnes/encoder/video/mixture_core/model.py
@@ -14,6 +14,7 @@
 #  limitations under the License.
 
 import math
+
 import tensorflow as tf
 import tensorflow.contrib.slim as slim
 
@@ -52,11 +53,11 @@ def __init__(self, feature_size,
 
     @staticmethod
     def rand_init(feature_size):
-        return tf.random_normal_initializer(stddev=1/math.sqrt(feature_size))
+        return tf.random_normal_initializer(stddev=1 / math.sqrt(feature_size))
 
     def build_model(self):
         self.feeds = tf.placeholder(tf.float32, [None, None, self.input_size])
-        #self.inputs = self.feeds
+        # self.inputs = self.feeds
         self.inputs = tf.layers.dense(self.feeds, self.feature_size)
         self.weights = tf.placeholder(tf.float32, [None, self.vocab_size])
         self.max_frames = tf.shape(self.inputs)[1]
@@ -83,7 +84,7 @@ def build_fvnet(self):
 
         covar_weights = tf.square(covar_weights)
         eps = tf.constant([1e-6])
-        covar_weights = tf.add(covar_weights,eps)
+        covar_weights = tf.add(covar_weights, eps)
 
         tf.summary.histogram("cluster_weights", cluster_weights)
         activation = tf.matmul(reshaped_input, cluster_weights)
@@ -111,7 +112,7 @@ def build_fvnet(self):
 
         a = tf.multiply(a_sum, cluster_weights2)
 
-        activation = tf.transpose(activation,perm=[0, 2, 1])
+        activation = tf.transpose(activation, perm=[0, 2, 1])
 
         reshaped_input = tf.reshape(reshaped_input,
                                     [-1, self.max_frames, self.feature_size])
@@ -131,15 +132,15 @@ def build_fvnet(self):
         fv2 = tf.divide(fv2, tf.square(covar_weights))
         fv2 = tf.subtract(fv2, a_sum)
 
-        fv2 = tf.reshape(fv2, [-1, self.cluster_size*self.feature_size])
+        fv2 = tf.reshape(fv2, [-1, self.cluster_size * self.feature_size])
         fv2 = tf.nn.l2_normalize(fv2, 1)
-        fv2 = tf.reshape(fv2, [-1, self.cluster_size*self.feature_size])
+        fv2 = tf.reshape(fv2, [-1, self.cluster_size * self.feature_size])
         fv2 = tf.nn.l2_normalize(fv2, 1)
 
         fv1 = tf.subtract(fv1, a)
         fv1 = tf.divide(fv1, covar_weights)
         fv1 = tf.nn.l2_normalize(fv1, 1)
-        fv1 = tf.reshape(fv1, [-1, self.cluster_size*self.feature_size])
+        fv1 = tf.reshape(fv1, [-1, self.cluster_size * self.feature_size])
         fv1 = tf.nn.l2_normalize(fv1, 1)
 
         self.repre = tf.concat([fv1, fv2], 1)
@@ -197,9 +198,9 @@ def build_loss(self):
         logits = tf.cast(self.label, tf.float32)
         if self.use_weights:
             logits = logits * self.weights
-        self.loss = - tf.log(tf.reduce_sum(logits * self.probabilities, axis=1)+1e-9)
+        self.loss = - tf.log(tf.reduce_sum(logits * self.probabilities, axis=1) + 1e-9)
         self.loss = tf.reduce_mean(self.loss)
-        self.pred =tf.argmax(self.probabilities, 1)
+        self.pred = tf.argmax(self.probabilities, 1)
         self.avg_diff = tf.cast(tf.equal(tf.argmax(self.label, 1), self.pred), tf.float32)
         self.avg_diff = tf.reduce_mean(self.avg_diff)
 
@@ -230,7 +231,7 @@ def build_loss(self):
                 self.probabilities2 = tf.nn.softmax(self.probabilities2)
 
             self.loss += tf.reduce_mean(-tf.log(
-                         tf.reduce_sum(logits2*self.probabilities2, axis=1)+1e-9))
+                tf.reduce_sum(logits2 * self.probabilities2, axis=1) + 1e-9))
             self.pred2 = tf.argmax(self.probabilities2, 1)
             self.avg_diff2 = tf.cast(tf.equal(tf.argmax(self.label_2, 1), self.pred2), tf.float32)
             self.avg_diff2 = tf.reduce_mean(self.avg_diff2)
@@ -242,4 +243,3 @@ def build_loss(self):
         self.eval_res = {'loss': self.loss, 'avg_diff': self.avg_diff}
         if self.use_2nd_label:
             self.eval_res['avg_diff2'] = self.avg_diff2
-
diff --git a/gnes/preprocessor/audio/audio_vanilla.py b/gnes/preprocessor/audio/audio_vanilla.py
@@ -13,8 +13,9 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 
-from .base import BaseAudioPreprocessor
 import numpy as np
+
+from .base import BaseAudioPreprocessor
 from ..helper import get_video_length_from_raw, get_audio
 from ...proto import array2blob
 
@@ -43,4 +44,4 @@ def apply(self, doc: 'gnes_pb2.Document') -> None:
             else:
                 self.logger.info('bad document: no audio extracted')
         else:
-            self.logger.error('bad document: "raw_bytes" is empty!')
+            self.logger.error('bad document: "raw_bytes" is empty!')
diff --git a/gnes/preprocessor/helper.py b/gnes/preprocessor/helper.py
@@ -15,14 +15,15 @@
 
 # pylint: disable=low-comment-ratio
 
+import datetime
 import io
+import os
 import subprocess as sp
+from datetime import timedelta
 from typing import List, Callable
-import os
+
 import cv2
 import numpy as np
-import datetime
-from datetime import timedelta
 from PIL import Image
 
 from ..helper import set_logger
@@ -32,12 +33,13 @@
 
 def get_video_length(video_path):
     import re
-    process = sp.Popen(['ffmpeg',  '-i', video_path],
+    process = sp.Popen(['ffmpeg', '-i', video_path],
                        stdout=sp.PIPE,
                        stderr=sp.STDOUT)
     stdout, _ = process.communicate()
     stdout = str(stdout)
-    matches = re.search(r"Duration:\s{1}(?P<hours>\d+?):(?P<minutes>\d+?):(?P<seconds>\d+\.\d+?),", stdout, re.DOTALL).groupdict()
+    matches = re.search(r"Duration:\s{1}(?P<hours>\d+?):(?P<minutes>\d+?):(?P<seconds>\d+\.\d+?),", stdout,
+                        re.DOTALL).groupdict()
     h = float(matches['hours'])
     m = float(matches['minutes'])
     s = float(matches['seconds'])
@@ -120,7 +122,9 @@ def split_mp4_random(video_path, avg_length, max_clip_second=10):
     prefix = os.path.basename(video_path).replace('.mp4', '')
     for i in range(num_part):
         i_len = len(ts_group[i])
-        cmd = 'ffmpeg' + ''.join(ts_group[i]) + '-filter_complex "{}concat=n={}:v=1:a=1" -strict -2 {}_{}.mp4 -y'.format(''.join(['[{}]'.format(k) for k in range(i_len)]), i_len, prefix, i)
+        cmd = 'ffmpeg' + ''.join(
+            ts_group[i]) + '-filter_complex "{}concat=n={}:v=1:a=1" -strict -2 {}_{}.mp4 -y'.format(
+            ''.join(['[{}]'.format(k) for k in range(i_len)]), i_len, prefix, i)
         os.system(cmd)
 
 
@@ -174,7 +178,8 @@ def get_video_frames(buffer_data: bytes, image_format: str = 'cv2',
                     cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                     frames.append(image)
                 except Exception as e:
-                    logger.warning("The decoded cv2 image from keyframe buffer can not be converted to RGB: %s" % str(e))
+                    logger.warning(
+                        "The decoded cv2 image from keyframe buffer can not be converted to RGB: %s" % str(e))
         else:
             logger.error("The image format [%s] is not supported so far!" % image_format)
             raise NotImplementedError

diff --git a/gnes/preprocessor/image/segmentation.py b/gnes/preprocessor/image/segmentation.py
@@ -15,10 +15,10 @@
 
 import io
 import os
+from typing import List
 
 import numpy as np
 from PIL import Image
-from typing import List
 
 from .base import BaseImagePreprocessor
 from ...proto import array2blob

diff --git a/gnes/preprocessor/image/sliding_window.py b/gnes/preprocessor/image/sliding_window.py
@@ -90,7 +90,8 @@ def _get_all_sliding_window(self, img: 'np.ndarray'):
         return [np.array(Image.fromarray(img).resize((self.target_img_size, self.target_img_size))) for img in
                 expanded_input], center_point_list
 
-    def _get_slid_offset_nd(self, all_subareas: List[List[int]], index: List[List[int]], center_point: List[float]) -> List[int]:
+    def _get_slid_offset_nd(self, all_subareas: List[List[int]], index: List[List[int]], center_point: List[float]) -> \
+    List[int]:
         location_list = self._get_location(all_subareas, center_point)
         location = [i for i in range(len(location_list)) if location_list[i] is True][0]
         return index[location][:2]
@@ -104,9 +105,11 @@ def _get_location(all_subareas: List[List[int]], center_point: List[float]) -> L
             if center_point[0] in range(int(area[0]), int(area[2])) and center_point[1] in range(int(area[1]),
                                                                                                  int(area[3])):
                 location_list.append(True)
-            elif center_point[0] in range(int(area[0]), int(area[2])) and y_boundary == area[3] and center_point[1] > y_boundary:
+            elif center_point[0] in range(int(area[0]), int(area[2])) and y_boundary == area[3] and center_point[
+                1] > y_boundary:
                 location_list.append(True)
-            elif center_point[1] in range(int(area[1]), int(area[3])) and x_boundary == area[2] and center_point[0] > x_boundary:
+            elif center_point[1] in range(int(area[1]), int(area[3])) and x_boundary == area[2] and center_point[
+                0] > x_boundary:
                 location_list.append(True)
             else:
                 location_list.append(False)

diff --git a/gnes/preprocessor/video/ffmpeg.py b/gnes/preprocessor/video/ffmpeg.py
@@ -131,13 +131,13 @@ def apply(self, doc: 'gnes_pb2.Document') -> None:
                     if self.segment_interval == -1:
                         sub_videos = [frames]
                     else:
-                        sub_videos = [frames[_: _+self.segment_interval]
+                        sub_videos = [frames[_: _ + self.segment_interval]
                                       for _ in range(0, len(frames), self.segment_interval)]
                 # cut by num: should specify how many chunks for each doc
                 elif self.segment_method == 'cut_by_num':
                     if self.segment_num >= 2:
-                        _interval = int(len(frames)/self.segment_num)
-                        sub_videos = [frames[_: _+_interval]
+                        _interval = int(len(frames) / self.segment_num)
+                        sub_videos = [frames[_: _ + _interval]
                                       for _ in range(0, len(frames), _interval)]
                     else:
                         sub_videos = [frames]

diff --git a/gnes/router/base.py b/gnes/router/base.py
@@ -36,4 +36,6 @@ def apply(self, msg: 'gnes_pb2.Message', accum_msgs: List['gnes_pb2.Message'], *
         if len(msg.envelope.num_part) > 1:
             msg.envelope.num_part.pop()
         else:
-            self.logger.error('can not reduce the message further, as num_part="%s"' % msg.envelope.num_part)
+            self.logger.warning(
+                'message envelope says num_part=%s, means no further message reducing. '
+                'ignore this if you explicitly set "num_part" in RouterService' % msg.envelope.num_part)