From 92500f0f1451914a7c68efdd71158b8cd03103c1 Mon Sep 17 00:00:00 2001
From: Larry Yan <stevenjobsbsbsb@hotmail.com>
Date: Wed, 31 Jul 2019 19:59:09 +0800
Subject: [PATCH 1/9] fix(encoder): add netvlad and netfv

---
 gnes/encoder/video/incep_mixture.py         |  96 ++++++++
 gnes/encoder/video/mixture_core/__init__.py |   0
 gnes/encoder/video/mixture_core/model.py    | 245 ++++++++++++++++++++
 3 files changed, 341 insertions(+)
 create mode 100644 gnes/encoder/video/incep_mixture.py
 create mode 100644 gnes/encoder/video/mixture_core/__init__.py
 create mode 100644 gnes/encoder/video/mixture_core/model.py

diff --git a/gnes/encoder/video/incep_mixture.py b/gnes/encoder/video/incep_mixture.py
new file mode 100644
index 00000000..052960b3
--- /dev/null
+++ b/gnes/encoder/video/incep_mixture.py
@@ -0,0 +1,96 @@
+#  Tencent is pleased to support the open source community by making GNES available.
+#
+#  Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from typing import List
+
+import numpy as np
+from PIL import Image
+
+from ..base import BaseVideoEncoder
+from ...helper import batching, batch_iterator, get_first_available_gpu
+
+
+class IncepMixtureEncoder(BaseVideoEncoder):
+
+    def __init__(self, model_dir_inception: str,
+                 model_dir_mixture: str,
+                 batch_size: int = 64,
+                 select_layer: str = 'PreLogitsFlatten',
+                 use_cuda: bool = False,
+                 feature_size: int = 300,
+                 vocab_size: int = 28,
+                 cluster_size: int = 256,
+                 method: str = 'netvlad',
+                 input_size: int = 1536,
+                 multitask_method: str = 'Attention'
+                 *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.model_dir_inception = model_dir_inception
+        self.model_dir_mixture = model_dir_mixture
+        self.batch_size = batch_size
+        self.select_layer = select_layer
+        self.use_cuda = use_cuda
+        self.cluster_size = cluster_size
+        self.feature_size = feature_size
+        self.vocab_size = vocab_size
+        self.method = method
+        self.input_size = input_size
+        self.multitask_method = multitask_method
+
+    def post_init(self):
+        import tensorflow as tf
+        from ..image.inception_cores.inception_v4 import inception_v4
+        from ..image.inception_cores.inception_utils import inception_arg_scope
+        from .mixture_core.incep_mixture import *
+        import os
+        os.environ['CUDA_VISIBLE_DEVICES'] = str(get_first_available_gpu())
+
+        g = tf.Graph()
+        with g.as_default():
+            arg_scope = inception_arg_scope()
+            inception_v4.default_image_size = self.inception_size_x
+            self.inputs = tf.placeholder(tf.float32, (None,
+                                                      self.inception_size_x,
+                                                      self.inception_size_y, 3))
+
+            with tf.contrib.slim.arg_scope(arg_scope):
+                self.logits, self.end_points = inception_v4(self.inputs,
+                                                            is_training=False,
+                                                            dropout_keep_prob=1.0)
+
+            config = tf.ConfigProto(log_device_placement=False)
+            if self._use_cuda:
+                config.gpu_options.allow_growth = True
+            self.sess = tf.Session(config=config)
+            self.saver = tf.train.Saver()
+            self.saver.restore(self.sess, self.model_dir_inception)
+
+        g2 = tf.Graph()
+        with g2.as_default():
+            config = tf.ConfigProto(log_device_placement=False)
+            if self._use_cuda:
+                config.gpu_options.allow_growth = True
+            self.sess2 = tf.Session(config=config)
+            self.mix_model = NetFV(feature_size=self.feature_size,
+                                   cluster_size=self.cluster_size,
+                                   vocab_size=self.vocab_size,
+                                   input_size=self.input_size,
+                                   use_2nd_label=True,
+                                   multitask_method=self.multitask_method,
+                                   method=self.method,
+                                   is_training=False)
+            saver = tf.train.Saver(max_to_keep=1)
+            self.sess2.run(tf.global_variables_initializer())
+            saver.restore(self.sess2, self.model_dir_mixture)
diff --git a/gnes/encoder/video/mixture_core/__init__.py b/gnes/encoder/video/mixture_core/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/gnes/encoder/video/mixture_core/model.py b/gnes/encoder/video/mixture_core/model.py
new file mode 100644
index 00000000..6a4a0631
--- /dev/null
+++ b/gnes/encoder/video/mixture_core/model.py
@@ -0,0 +1,245 @@
+#  Tencent is pleased to support the open source community by making GNES available.
+#
+#  Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+import math
+import tensorflow as tf
+import tensorflow.contrib.slim as slim
+
+
+class NetFV():
+    def __init__(self, feature_size,
+                 cluster_size,
+                 vocab_size,
+                 method='netvlad',
+                 input_size=None,
+                 use_2nd_label=False,
+                 vocab_size_2=None,
+                 add_batch_norm=True,
+                 is_training=False,
+                 use_weights=True,
+                 save_dir=None,
+                 multitask_method=None,
+                 l2_penalty=1e-6):
+        if input_size == None:
+            self.input_size = feature_size
+        else:
+            self.input_size = input_size
+        self.feature_size = feature_size
+        self.is_training = is_training
+        self.vocab_size = vocab_size
+        self.use_2nd_label = use_2nd_label
+        self.vocab_size_2 = vocab_size_2
+        self.add_batch_norm = add_batch_norm
+        self.cluster_size = cluster_size
+        self.use_weights = use_weights
+        self.l2_penalty = l2_penalty
+        self.method = method
+        self.multitask_method = multitask_method
+        self.build_model()
+        self.build_loss()
+
+    @staticmethod
+    def rand_init(feature_size):
+        return tf.random_normal_initializer(stddev=1/math.sqrt(feature_size))
+
+    def build_model(self):
+        self.feeds = tf.placeholder(tf.float32, [None, None, self.input_size])
+        #self.inputs = self.feeds
+        self.inputs = tf.layers.dense(self.feeds, self.feature_size)
+        self.weights = tf.placeholder(tf.float32, [None, self.vocab_size])
+        self.max_frames = tf.shape(self.inputs)[1]
+        if self.method == 'fvnet':
+            self.build_fvnet()
+        elif self.method == 'netvlad':
+            self.build_netvlad()
+        elif self.method == 'pooling':
+            self.build_pooling()
+
+    def build_pooling(self):
+        self.repre = tf.layers.dense(self.inputs, self.feature_size)
+        self.repre = tf.reduce_max(self.repre, axis=1)
+
+    def build_fvnet(self):
+        reshaped_input = tf.reshape(self.inputs, [-1, self.feature_size])
+        cluster_weights = tf.get_variable("cluster_weights",
+                                          [self.feature_size, self.cluster_size],
+                                          initializer=NetFV.rand_init(self.feature_size))
+
+        covar_weights = tf.get_variable("covar_weights",
+                                        [self.feature_size, self.cluster_size],
+                                        initializer=NetFV.rand_init(self.feature_size))
+
+        covar_weights = tf.square(covar_weights)
+        eps = tf.constant([1e-6])
+        covar_weights = tf.add(covar_weights,eps)
+
+        tf.summary.histogram("cluster_weights", cluster_weights)
+        activation = tf.matmul(reshaped_input, cluster_weights)
+        if self.add_batch_norm:
+            activation = slim.batch_norm(activation,
+                                         center=True,
+                                         scale=True,
+                                         is_training=self.is_training,
+                                         scope="cluster_bn")
+        else:
+            cluster_biases = tf.get_variable("cluster_biases",
+                                             [self.cluster_size],
+                                             initializer=NetFV.rand_init(self.feature_size))
+            tf.summary.histogram("cluster_biases", cluster_biases)
+            activation += cluster_biases
+
+        activation = tf.nn.softmax(activation)
+        tf.summary.histogram("cluster_output", activation)
+
+        activation = tf.reshape(activation, [-1, self.max_frames, self.cluster_size])
+
+        a_sum = tf.reduce_sum(activation, -2, keepdims=True)
+
+        cluster_weights2 = tf.scalar_mul(0.01, cluster_weights)
+
+        a = tf.multiply(a_sum, cluster_weights2)
+
+        activation = tf.transpose(activation,perm=[0, 2, 1])
+
+        reshaped_input = tf.reshape(reshaped_input,
+                                    [-1, self.max_frames, self.feature_size])
+        fv1 = tf.matmul(activation, reshaped_input)
+
+        fv1 = tf.transpose(fv1, perm=[0, 2, 1])
+
+        # computing second order FV
+        a2 = tf.multiply(a_sum, tf.square(cluster_weights2))
+
+        b2 = tf.multiply(fv1, cluster_weights2)
+        fv2 = tf.matmul(activation, tf.square(reshaped_input))
+
+        fv2 = tf.transpose(fv2, perm=[0, 2, 1])
+        fv2 = tf.add_n([a2, fv2, tf.scalar_mul(-2, b2)])
+
+        fv2 = tf.divide(fv2, tf.square(covar_weights))
+        fv2 = tf.subtract(fv2, a_sum)
+
+        fv2 = tf.reshape(fv2, [-1, self.cluster_size*self.feature_size])
+        fv2 = tf.nn.l2_normalize(fv2, 1)
+        fv2 = tf.reshape(fv2, [-1, self.cluster_size*self.feature_size])
+        fv2 = tf.nn.l2_normalize(fv2, 1)
+
+        fv1 = tf.subtract(fv1, a)
+        fv1 = tf.divide(fv1, covar_weights)
+        fv1 = tf.nn.l2_normalize(fv1, 1)
+        fv1 = tf.reshape(fv1, [-1, self.cluster_size*self.feature_size])
+        fv1 = tf.nn.l2_normalize(fv1, 1)
+
+        self.repre = tf.concat([fv1, fv2], 1)
+        self.repre = tf.layers.dense(self.repre, self.feature_size)
+
+    def build_netvlad(self):
+        reshaped_input = tf.reshape(self.inputs, [-1, self.feature_size])
+        cluster_weights = tf.get_variable("cluster_weights",
+                                          [self.feature_size, self.cluster_size],
+                                          initializer=NetFV.rand_init(self.feature_size))
+        activation = tf.matmul(reshaped_input, cluster_weights)
+        if self.add_batch_norm:
+            activation = slim.batch_norm(activation,
+                                         center=True,
+                                         scale=True,
+                                         is_training=self.is_training,
+                                         scope="cluster_bn")
+        else:
+            cluster_biases = tf.get_variable("cluster_biases",
+                                             [self.cluster_size],
+                                             initializer=NetFV.rand_init(self.feature_size))
+            activation += cluster_biases
+        activation = tf.nn.softmax(activation)
+        activation = tf.reshape(activation, [-1, self.max_frames, self.cluster_size])
+
+        a_sum = tf.reduce_sum(activation, -2, keep_dims=True)
+
+        cluster_weights2 = tf.get_variable("cluster_weights2",
+                                           [1, self.feature_size, self.cluster_size],
+                                           initializer=NetFV.rand_init(self.feature_size))
+
+        a = tf.multiply(a_sum, cluster_weights2)
+        activation = tf.transpose(activation, perm=[0, 2, 1])
+
+        reshaped_input = tf.reshape(reshaped_input,
+                                    [-1, self.max_frames, self.feature_size])
+        vlad = tf.matmul(activation, reshaped_input)
+        vlad = tf.transpose(vlad, perm=[0, 2, 1])
+        vlad = tf.subtract(vlad, a)
+
+        vlad = tf.nn.l2_normalize(vlad, 1)
+
+        vlad = tf.reshape(vlad, [-1, self.cluster_size * self.feature_size])
+        vlad = tf.nn.l2_normalize(vlad, 1)
+        self.repre = vlad
+
+    def build_loss(self):
+        self.probabilities = tf.layers.dense(self.repre,
+                                             self.vocab_size,
+                                             activation=tf.nn.tanh)
+        self.probabilities = tf.layers.dense(self.probabilities, self.vocab_size)
+        self.probabilities = tf.nn.softmax(self.probabilities)
+
+        self.label = tf.placeholder(tf.int32, [None, self.vocab_size])
+        logits = tf.cast(self.label, tf.float32)
+        if self.use_weights:
+            logits = logits * self.weights
+        self.loss = - tf.log(tf.reduce_sum(logits * self.probabilities, axis=1)+1e-9)
+        self.loss = tf.reduce_mean(self.loss)
+        self.pred =tf.argmax(self.probabilities, 1)
+        self.avg_diff = tf.cast(tf.equal(tf.argmax(self.label, 1), self.pred), tf.float32)
+        self.avg_diff = tf.reduce_mean(self.avg_diff)
+
+        # add 2nd layer labels
+        if self.use_2nd_label:
+            self.label_2 = tf.placeholder(tf.int32, [None, self.vocab_size_2])
+            logits2 = tf.cast(self.label_2, tf.float32)
+
+            if self.multitask_method is None:
+                self.probabilities2 = tf.layers.dense(self.repre,
+                                                      self.vocab_size_2,
+                                                      activation=tf.nn.tanh)
+                self.probabilities2 = tf.layers.dense(self.probabilities2, self.vocab_size_2)
+                self.probabilities2 = tf.nn.softmax(self.probabilities2)
+
+            elif self.multitask_method == 'Attention':
+                self.x = tf.get_variable('emb',
+                                         shape=[self.vocab_size, self.feature_size],
+                                         dtype=tf.float32,
+                                         initializer=NetFV.rand_init(self.feature_size))
+                self.emb_label = tf.matmul(self.probabilities, self.x)
+                self.emb_concat = tf.concat([self.emb_label, self.repre], axis=1)
+                self.probabilities2 = tf.layers.dense(self.emb_concat,
+                                                      self.vocab_size_2,
+                                                      activation=tf.nn.tanh)
+                self.probabilities2 = tf.layers.dense(self.probabilities2,
+                                                      self.vocab_size_2)
+                self.probabilities2 = tf.nn.softmax(self.probabilities2)
+
+            self.loss += tf.reduce_mean(-tf.log(
+                         tf.reduce_sum(logits2*self.probabilities2, axis=1)+1e-9))
+            self.pred2 = tf.argmax(self.probabilities2, 1)
+            self.avg_diff2 = tf.cast(tf.equal(tf.argmax(self.label_2, 1), self.pred2), tf.float32)
+            self.avg_diff2 = tf.reduce_mean(self.avg_diff2)
+
+        self.optimizer = tf.train.AdamOptimizer(learning_rate=0.0005,
+                                                epsilon=1e-08,
+                                                name='adam')
+        self.train_op = slim.learning.create_train_op(self.loss, self.optimizer)
+        self.eval_res = {'loss': self.loss, 'avg_diff': self.avg_diff}
+        if self.use_2nd_label:
+            self.eval_res['avg_diff2'] = self.avg_diff2
+

From 679915336a2d3d99041844717723e8a06dae5899 Mon Sep 17 00:00:00 2001
From: Larry Yan <stevenjobsbsbsb@hotmail.com>
Date: Wed, 31 Jul 2019 21:16:55 +0800
Subject: [PATCH 2/9] fix(encoder): add netvlad and netfv register class

---
 gnes/encoder/video/incep_mixture.py | 56 +++++++++++++++++++++++------
 1 file changed, 46 insertions(+), 10 deletions(-)

diff --git a/gnes/encoder/video/incep_mixture.py b/gnes/encoder/video/incep_mixture.py
index 052960b3..d3461b7d 100644
--- a/gnes/encoder/video/incep_mixture.py
+++ b/gnes/encoder/video/incep_mixture.py
@@ -18,11 +18,11 @@
 import numpy as np
 from PIL import Image
 
-from ..base import BaseVideoEncoder
-from ...helper import batching, batch_iterator, get_first_available_gpu
+from gnes.encoder.base import BaseImageEncoder
+from gnes.helper import batching, batch_iterator, get_first_available_gpu
 
 
-class IncepMixtureEncoder(BaseVideoEncoder):
+class IncepMixtureEncoder(BaseImageEncoder):
 
     def __init__(self, model_dir_inception: str,
                  model_dir_mixture: str,
@@ -32,9 +32,11 @@ def __init__(self, model_dir_inception: str,
                  feature_size: int = 300,
                  vocab_size: int = 28,
                  cluster_size: int = 256,
-                 method: str = 'netvlad',
+                 method: str = 'fvnet',
                  input_size: int = 1536,
-                 multitask_method: str = 'Attention'
+                 vocab_size_2: int = 174,
+                 max_frames: int = 30,
+                 multitask_method: str = 'Attention',
                  *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.model_dir_inception = model_dir_inception
@@ -48,12 +50,16 @@ def __init__(self, model_dir_inception: str,
         self.method = method
         self.input_size = input_size
         self.multitask_method = multitask_method
+        self.inception_size_x = 299
+        self.inception_size_y = 299
+        self.max_frames = max_frames
+        self.vocab_size_2 = vocab_size_2
 
     def post_init(self):
         import tensorflow as tf
-        from ..image.inception_cores.inception_v4 import inception_v4
-        from ..image.inception_cores.inception_utils import inception_arg_scope
-        from .mixture_core.incep_mixture import *
+        from gnes.encoder.image.inception_cores.inception_v4 import inception_v4
+        from gnes.encoder.image.inception_cores.inception_utils import inception_arg_scope
+        from gnes.encoder.video.mixture_core.model import NetFV
         import os
         os.environ['CUDA_VISIBLE_DEVICES'] = str(get_first_available_gpu())
 
@@ -71,7 +77,7 @@ def post_init(self):
                                                             dropout_keep_prob=1.0)
 
             config = tf.ConfigProto(log_device_placement=False)
-            if self._use_cuda:
+            if self.use_cuda:
                 config.gpu_options.allow_growth = True
             self.sess = tf.Session(config=config)
             self.saver = tf.train.Saver()
@@ -80,7 +86,7 @@ def post_init(self):
         g2 = tf.Graph()
         with g2.as_default():
             config = tf.ConfigProto(log_device_placement=False)
-            if self._use_cuda:
+            if self.use_cuda:
                 config.gpu_options.allow_growth = True
             self.sess2 = tf.Session(config=config)
             self.mix_model = NetFV(feature_size=self.feature_size,
@@ -88,9 +94,39 @@ def post_init(self):
                                    vocab_size=self.vocab_size,
                                    input_size=self.input_size,
                                    use_2nd_label=True,
+                                   vocab_size_2=self.vocab_size_2,
                                    multitask_method=self.multitask_method,
                                    method=self.method,
                                    is_training=False)
             saver = tf.train.Saver(max_to_keep=1)
             self.sess2.run(tf.global_variables_initializer())
             saver.restore(self.sess2, self.model_dir_mixture)
+
+    @batching
+    def encode(self, videos: List['np.ndarray'], *args, **kwargs) -> np.ndarray:
+        ret = []
+        v_len = [len(v) for v in videos]
+        pos_start = [0] + [sum(v_len[:i]) for i in range(1, len(v_len)-1)]
+        pos_end = [sum(v_len[:i]) for i in range(1, len(v_len))]
+        max_len = min(max(v_len), self.max_frames)
+
+        img = [im for v in videos for im in v]
+        img = [(np.array(Image.fromarray(im).resize((self.inception_size_x,
+                                                     self.inception_size_y)), dtype=np.float32) * 2 / 255. - 1.) for im
+               in img]
+        for _im in batch_iterator(img, self.batch_size):
+            _, end_points_ = self.sess.run((self.logits, self.end_points),
+                                           feed_dict={self.inputs: _im})
+            ret.append(end_points_[self.select_layer])
+        v = [_ for vi in ret for _ in vi]
+
+        v_input = [v[s:e] for s, e in zip(pos_start, pos_end)]
+        v_input = [(vi + [[0.0]*self.input_size]*(max_len-len(vi)))[:max_len] for vi in v_input]
+        v_input = [np.array(vi, dtype=np.float32) for vi in v_input]
+
+        ret = []
+        for _vi in batch_iterator(v_input, self.batch_size):
+            repre = self.sess2.run(self.mix_model.repre,
+                                   feed_dict={self.mix_model.feeds: v_input})
+            ret.append(repre)
+        return np.concatenate(ret, axis=1).astype(np.float32)

From 3fdf1c06e302a5c3e32d28de431e076802ff5c9e Mon Sep 17 00:00:00 2001
From: Larry Yan <stevenjobsbsbsb@hotmail.com>
Date: Wed, 31 Jul 2019 21:43:52 +0800
Subject: [PATCH 3/9] fix(encoder): fix mixture

---
 gnes/encoder/__init__.py          |  4 ++-
 gnes/encoder/base.py              |  6 +++++
 gnes/preprocessor/__init__.py     |  1 +
 gnes/preprocessor/video/ffmpeg.py | 42 +++++++++++++++++++++++++++++++
 4 files changed, 52 insertions(+), 1 deletion(-)

diff --git a/gnes/encoder/__init__.py b/gnes/encoder/__init__.py
index e77f2411..94dbeefe 100644
--- a/gnes/encoder/__init__.py
+++ b/gnes/encoder/__init__.py
@@ -34,13 +34,15 @@
     'BaseEncoder': 'base',
     'BaseBinaryEncoder': 'base',
     'BaseTextEncoder': 'base',
+    'BaseVideoEncoder': 'base',
     'BaseNumericEncoder': 'base',
     'CompositionalEncoder': 'base',
     'PipelineEncoder': 'base',
     'HashEncoder': 'numeric.hash',
     'BasePytorchEncoder': 'image.base',
     'TFInceptionEncoder': 'image.inception',
-    'CVAEEncoder': 'image.cvae'
+    'CVAEEncoder': 'image.cvae',
+    'IncepMixtureEncoder': 'video.incep_mixture'
 }
 
 register_all_class(_cls2file_map, 'encoder')
diff --git a/gnes/encoder/base.py b/gnes/encoder/base.py
index 964e237e..24f15f3a 100644
--- a/gnes/encoder/base.py
+++ b/gnes/encoder/base.py
@@ -38,6 +38,12 @@ def encode(self, img: List['np.ndarray'], *args, **kwargs) -> np.ndarray:
         pass
 
 
+class BaseVideoEncoder(BaseEncoder):
+
+    def encode(self, img: List['np.ndarray'], *args, **kwargs) -> np.ndarray:
+        pass
+
+
 class BaseTextEncoder(BaseEncoder):
 
     def encode(self, text: List[str], *args, **kwargs) -> np.ndarray:
diff --git a/gnes/preprocessor/__init__.py b/gnes/preprocessor/__init__.py
index 6764e9b0..b248de3d 100644
--- a/gnes/preprocessor/__init__.py
+++ b/gnes/preprocessor/__init__.py
@@ -30,6 +30,7 @@
     'BaseUnaryPreprocessor': 'base',
     'BaseVideoPreprocessor': 'video.base',
     'FFmpegPreprocessor': 'video.ffmpeg',
+    'FFmpegVideoSegmentor': 'video.ffmpeg',
     'ShotDetectPreprocessor': 'video.shotdetect',
 }
 
diff --git a/gnes/preprocessor/video/ffmpeg.py b/gnes/preprocessor/video/ffmpeg.py
index c21439b9..70a74dd5 100644
--- a/gnes/preprocessor/video/ffmpeg.py
+++ b/gnes/preprocessor/video/ffmpeg.py
@@ -106,3 +106,45 @@ def duplicate_rm_hash(self,
                 ret.append((i, h))
 
         return [images[_[0]] for _ in ret]
+
+
+class FFmpegVideoSegmentor(BaseVideoPreprocessor):
+    def __init__(self,
+                 frame_size: str = "192*168",
+                 segment_method: str = 'uniform',
+                 segment_interval: int = -1,
+                 *args,
+                 **kwargs):
+        super().__init__(*args, **kwargs)
+        self.frame_size = frame_size
+        self.segment_method = segment_method
+        self.segment_interval = segment_interval
+        self._ffmpeg_kwargs = kwargs
+
+    def apply(self, doc: 'gnes_pb2.Document') -> None:
+        super().apply(doc)
+        if doc.raw_bytes:
+            frames = get_video_frames(
+                doc.raw_bytes,
+                s=self.frame_size,
+                vsync=self._ffmpeg_kwargs.get("vsync", "vfr"),
+                vf=self._ffmpeg_kwargs.get("vf", "select=eq(pict_type\\,I)"))
+
+            sub_videos = []
+            if len(frames) >= 1:
+                if self.segment_method == 'uniform':
+                    if self.segment_interval == -1:
+                        sub_videos = [frames]
+                    else:
+                        sub_videos = [frames[_: _+self.segment_interval]
+                                      for _ in range(0, len(frames), self.segment_interval)]
+                    for ci, chunk in enumerate(sub_videos):
+                        c = doc.chunks.add()
+                        c.doc_id = doc.doc_id
+                        c.blob.CopyFrom(array2blob(np.array(chunk, dtype=np.uint8)))
+                        c.offset_1d = ci
+                        c.weight = 1 / len(sub_videos)
+            else:
+                self.logger.info('bad document: no key frames extracted')
+        else:
+            self.logger.error('bad document: "raw_bytes" is empty!')

From 95f03c56701f4a691f8412490f6dcf9565751da1 Mon Sep 17 00:00:00 2001
From: Larry Yan <stevenjobsbsbsb@hotmail.com>
Date: Thu, 1 Aug 2019 10:32:00 +0800
Subject: [PATCH 4/9] fix(encoder): fix bug in video mixture encoder

---
 gnes/encoder/video/incep_mixture.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gnes/encoder/video/incep_mixture.py b/gnes/encoder/video/incep_mixture.py
index d3461b7d..357b60cb 100644
--- a/gnes/encoder/video/incep_mixture.py
+++ b/gnes/encoder/video/incep_mixture.py
@@ -107,7 +107,7 @@ def encode(self, videos: List['np.ndarray'], *args, **kwargs) -> np.ndarray:
         ret = []
         v_len = [len(v) for v in videos]
         pos_start = [0] + [sum(v_len[:i]) for i in range(1, len(v_len)-1)]
-        pos_end = [sum(v_len[:i]) for i in range(1, len(v_len))]
+        pos_end = [sum(v_len[:i]) for i in range(len(v_len))]
         max_len = min(max(v_len), self.max_frames)
 
         img = [im for v in videos for im in v]

From 17779676bd452b2679ba7e64918721cf85e0bec1 Mon Sep 17 00:00:00 2001
From: Larry Yan <stevenjobsbsbsb@hotmail.com>
Date: Thu, 1 Aug 2019 10:44:06 +0800
Subject: [PATCH 5/9] fix(encoder): fix mixture encoder

---
 gnes/encoder/video/incep_mixture.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gnes/encoder/video/incep_mixture.py b/gnes/encoder/video/incep_mixture.py
index 357b60cb..eac796e4 100644
--- a/gnes/encoder/video/incep_mixture.py
+++ b/gnes/encoder/video/incep_mixture.py
@@ -106,8 +106,8 @@ def post_init(self):
     def encode(self, videos: List['np.ndarray'], *args, **kwargs) -> np.ndarray:
         ret = []
         v_len = [len(v) for v in videos]
-        pos_start = [0] + [sum(v_len[:i]) for i in range(1, len(v_len)-1)]
-        pos_end = [sum(v_len[:i]) for i in range(len(v_len))]
+        pos_start = [0] + [sum(v_len[:i+1]) for i in range(len(v_len)-1)]
+        pos_end = [sum(v_len[:i+1]) for i in range(len(v_len))]
         max_len = min(max(v_len), self.max_frames)
 
         img = [im for v in videos for im in v]

From d6a46fa6b6a931b4dc0e334f5a89bb66460a84b0 Mon Sep 17 00:00:00 2001
From: Larry Yan <stevenjobsbsbsb@hotmail.com>
Date: Thu, 1 Aug 2019 11:32:37 +0800
Subject: [PATCH 6/9] fix(encoder): fix import path for mixture encoder

---
 gnes/encoder/video/incep_mixture.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/gnes/encoder/video/incep_mixture.py b/gnes/encoder/video/incep_mixture.py
index eac796e4..60a7710d 100644
--- a/gnes/encoder/video/incep_mixture.py
+++ b/gnes/encoder/video/incep_mixture.py
@@ -18,11 +18,11 @@
 import numpy as np
 from PIL import Image
 
-from gnes.encoder.base import BaseImageEncoder
-from gnes.helper import batching, batch_iterator, get_first_available_gpu
+from ..base import BaseVideoEncoder
+from ...helper import batching, batch_iterator, get_first_available_gpu
 
 
-class IncepMixtureEncoder(BaseImageEncoder):
+class IncepMixtureEncoder(BaseVideoEncoder):
 
     def __init__(self, model_dir_inception: str,
                  model_dir_mixture: str,
@@ -57,9 +57,9 @@ def __init__(self, model_dir_inception: str,
 
     def post_init(self):
         import tensorflow as tf
-        from gnes.encoder.image.inception_cores.inception_v4 import inception_v4
-        from gnes.encoder.image.inception_cores.inception_utils import inception_arg_scope
-        from gnes.encoder.video.mixture_core.model import NetFV
+        from ..image.inception_cores.inception_v4 import inception_v4
+        from ..image.inception_cores.inception_utils import inception_arg_scope
+        from .mixture_core.model import NetFV
         import os
         os.environ['CUDA_VISIBLE_DEVICES'] = str(get_first_available_gpu())
 

From 5b0fe7c6c3c5f2fa433c090fdad9c8c80d59f4df Mon Sep 17 00:00:00 2001
From: Larry Yan <stevenjobsbsbsb@hotmail.com>
Date: Thu, 1 Aug 2019 14:15:59 +0800
Subject: [PATCH 7/9] fix(preprocessor): fix FFmpegVideoSegmentor

---
 gnes/preprocessor/video/ffmpeg.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gnes/preprocessor/video/ffmpeg.py b/gnes/preprocessor/video/ffmpeg.py
index 70a74dd5..30dca7a1 100644
--- a/gnes/preprocessor/video/ffmpeg.py
+++ b/gnes/preprocessor/video/ffmpeg.py
@@ -110,7 +110,7 @@ def duplicate_rm_hash(self,
 
 class FFmpegVideoSegmentor(BaseVideoPreprocessor):
     def __init__(self,
-                 frame_size: str = "192*168",
+                 frame_size: str = "299*299",
                  segment_method: str = 'uniform',
                  segment_interval: int = -1,
                  *args,

From 15eb50b4d1a85e3b3ddd8ceeb7e3fd0b2b45b428 Mon Sep 17 00:00:00 2001
From: Larry Yan <stevenjobsbsbsb@hotmail.com>
Date: Thu, 1 Aug 2019 14:29:30 +0800
Subject: [PATCH 8/9] fix(encoder): fix params in basevideo encoder

---
 gnes/encoder/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gnes/encoder/base.py b/gnes/encoder/base.py
index 24f15f3a..6fb45b7c 100644
--- a/gnes/encoder/base.py
+++ b/gnes/encoder/base.py
@@ -40,7 +40,7 @@ def encode(self, img: List['np.ndarray'], *args, **kwargs) -> np.ndarray:
 
 class BaseVideoEncoder(BaseEncoder):
 
-    def encode(self, img: List['np.ndarray'], *args, **kwargs) -> np.ndarray:
+    def encode(self, videos: List['np.ndarray'], *args, **kwargs) -> np.ndarray:
         pass
 
 

From 46b5c94eea4973e3f1b2dcb05e85981f316a4ca3 Mon Sep 17 00:00:00 2001
From: Larry Yan <stevenjobsbsbsb@hotmail.com>
Date: Thu, 1 Aug 2019 14:52:31 +0800
Subject: [PATCH 9/9] fix(encoder): fix name for video encoder

---
 gnes/encoder/base.py                | 2 +-
 gnes/encoder/video/incep_mixture.py | 6 +++---
 gnes/helper.py                      | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/gnes/encoder/base.py b/gnes/encoder/base.py
index 6fb45b7c..d21ad3aa 100644
--- a/gnes/encoder/base.py
+++ b/gnes/encoder/base.py
@@ -40,7 +40,7 @@ def encode(self, img: List['np.ndarray'], *args, **kwargs) -> np.ndarray:
 
 class BaseVideoEncoder(BaseEncoder):
 
-    def encode(self, videos: List['np.ndarray'], *args, **kwargs) -> np.ndarray:
+    def encode(self, data: List['np.ndarray'], *args, **kwargs) -> np.ndarray:
         pass
 
 
diff --git a/gnes/encoder/video/incep_mixture.py b/gnes/encoder/video/incep_mixture.py
index 60a7710d..2c5a8a3f 100644
--- a/gnes/encoder/video/incep_mixture.py
+++ b/gnes/encoder/video/incep_mixture.py
@@ -103,14 +103,14 @@ def post_init(self):
             saver.restore(self.sess2, self.model_dir_mixture)
 
     @batching
-    def encode(self, videos: List['np.ndarray'], *args, **kwargs) -> np.ndarray:
+    def encode(self, data: List['np.ndarray'], *args, **kwargs) -> np.ndarray:
         ret = []
-        v_len = [len(v) for v in videos]
+        v_len = [len(v) for v in data]
         pos_start = [0] + [sum(v_len[:i+1]) for i in range(len(v_len)-1)]
         pos_end = [sum(v_len[:i+1]) for i in range(len(v_len))]
         max_len = min(max(v_len), self.max_frames)
 
-        img = [im for v in videos for im in v]
+        img = [im for v in data for im in v]
         img = [(np.array(Image.fromarray(im).resize((self.inception_size_x,
                                                      self.inception_size_y)), dtype=np.float32) * 2 / 255. - 1.) for im
                in img]
diff --git a/gnes/helper.py b/gnes/helper.py
index 9758aa02..7e69b0bf 100644
--- a/gnes/helper.py
+++ b/gnes/helper.py
@@ -48,8 +48,8 @@ def get_first_available_gpu():
     try:
         import GPUtil
         r = GPUtil.getAvailable(order='random',
-                                maxMemory=0.1,
-                                maxLoad=0.1,
+                                maxMemory=0.5,
+                                maxLoad=0.5,
                                 limit=1)
         if r:
             return r[0]