From 3901078ced9c0fcf451111e9376bd3dca50cb6d5 Mon Sep 17 00:00:00 2001 From: felix Date: Fri, 11 Oct 2019 11:40:46 +0800 Subject: [PATCH 1/6] feat(incep_v4_encoder): add inception v4 encoder for video --- gnes/encoder/__init__.py | 1 + gnes/encoder/video/inception.py | 84 +++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+) create mode 100644 gnes/encoder/video/inception.py diff --git a/gnes/encoder/__init__.py b/gnes/encoder/__init__.py index 84be4e7c..4ab16022 100644 --- a/gnes/encoder/__init__.py +++ b/gnes/encoder/__init__.py @@ -45,6 +45,7 @@ 'VggishEncoder': 'audio.vggish', 'YouTube8MFeatureExtractor': 'video.yt8m_feature_extractor', 'YouTube8MEncoder': 'video.yt8m_model', + 'InceptionVideoEncoder': 'video.inception', 'QuantizerEncoder': 'numeric.quantizer', 'CharEmbeddingEncoder': 'text.char' } diff --git a/gnes/encoder/video/inception.py b/gnes/encoder/video/inception.py new file mode 100644 index 00000000..23962a3d --- /dev/null +++ b/gnes/encoder/video/inception.py @@ -0,0 +1,84 @@ +# Tencent is pleased to support the open source community by making GNES available. +# +# Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import List + +import numpy as np +from PIL import Image + +from ..base import BaseVideoEncoder +from ...helper import batching, get_first_available_gpu + + +class InceptionVideoEncoder(BaseVideoEncoder): + batch_size = 64 + + def __init__(self, + model_dir: str, + select_layer: str = 'PreLogitsFlatten', + *args, + **kwargs): + super().__init__(*args, **kwargs) + self.model_dir = model_dir + self.select_layer = select_layer + self.inception_size_x = 299 + self.inception_size_y = 299 + + def post_init(self): + import tensorflow as tf + from ..image.inception_cores.inception_v4 import inception_v4 + from ..image.inception_cores.inception_utils import inception_arg_scope + from .mixture_core.model import NetFV + import os + os.environ['CUDA_VISIBLE_DEVICES'] = str(get_first_available_gpu()) + + g = tf.Graph() + with g.as_default(): + arg_scope = inception_arg_scope() + inception_v4.default_image_size = self.inception_size_x + self.inputs = tf.placeholder( + tf.float32, + (None, self.inception_size_x, self.inception_size_y, 3)) + + with tf.contrib.slim.arg_scope(arg_scope): + self.logits, self.end_points = inception_v4( + self.inputs, is_training=False, dropout_keep_prob=1.0) + + config = tf.ConfigProto(log_device_placement=False) + if self.on_gpu: + config.gpu_options.allow_growth = True + self.sess = tf.Session(config=config) + self.saver = tf.train.Saver() + self.saver.restore(self.sess, self.model_dir) + + def encode(self, data: List['np.ndarray'], *args, + **kwargs) -> List['np.ndarray']: + v_len = [len(v) for v in data] + pos_start = [0] + [sum(v_len[:i + 1]) for i in range(len(v_len) - 1)] + pos_end = [sum(v_len[:i + 1]) for i in range(len(v_len))] + + _resize = lambda x: (np.array(Image.fromarray(x).resize((self.inception_size_x, self.inception_size_y)), dtype=np.float32) * 2 / 255. - 1.) + + images = [_resize(im) for v in data for im in v] + + @batching + def _encode(self, data): + _, end_points_ = self.sess.run((self.logits, self.end_points), + feed_dict={self.inputs: data}) + return end_points_[self.select_layer] + + encodes = _encode(images).astype(np.float32) + + return [encodes[s:e].copy() for s, e in zip(pos_start, pos_end)] From 628b24295dd71bc80a30484927a2c06358d39b08 Mon Sep 17 00:00:00 2001 From: felix Date: Fri, 11 Oct 2019 15:15:46 +0800 Subject: [PATCH 2/6] add debug log --- gnes/encoder/video/inception.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gnes/encoder/video/inception.py b/gnes/encoder/video/inception.py index 23962a3d..c5c8be05 100644 --- a/gnes/encoder/video/inception.py +++ b/gnes/encoder/video/inception.py @@ -69,7 +69,9 @@ def encode(self, data: List['np.ndarray'], *args, pos_start = [0] + [sum(v_len[:i + 1]) for i in range(len(v_len) - 1)] pos_end = [sum(v_len[:i + 1]) for i in range(len(v_len))] - _resize = lambda x: (np.array(Image.fromarray(x).resize((self.inception_size_x, self.inception_size_y)), dtype=np.float32) * 2 / 255. - 1.) + def _resize(x): + self.logger.info('image shape: %s' % str(x.shape)) + return np.array(Image.fromarray(x).resize((self.inception_size_x, self.inception_size_y)), dtype=np.float32) * 2 / 255. - 1.) images = [_resize(im) for v in data for im in v] From 6adc053f23c6f1dfe8d6234affa7a11b475af4f1 Mon Sep 17 00:00:00 2001 From: felix Date: Fri, 11 Oct 2019 15:18:45 +0800 Subject: [PATCH 3/6] add debug log --- gnes/encoder/video/inception.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gnes/encoder/video/inception.py b/gnes/encoder/video/inception.py index c5c8be05..dab1e603 100644 --- a/gnes/encoder/video/inception.py +++ b/gnes/encoder/video/inception.py @@ -71,7 +71,7 @@ def encode(self, data: List['np.ndarray'], *args, def _resize(x): self.logger.info('image shape: %s' % str(x.shape)) - return np.array(Image.fromarray(x).resize((self.inception_size_x, self.inception_size_y)), dtype=np.float32) * 2 / 255. - 1.) + return np.array(Image.fromarray(x).resize((self.inception_size_x, self.inception_size_y)), dtype=np.float32) * 2 / 255. - 1. images = [_resize(im) for v in data for im in v] From 4a5e98422be79141e25d800edb45656f2447c1aa Mon Sep 17 00:00:00 2001 From: felix Date: Fri, 11 Oct 2019 15:26:51 +0800 Subject: [PATCH 4/6] fix error --- gnes/encoder/video/inception.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/gnes/encoder/video/inception.py b/gnes/encoder/video/inception.py index dab1e603..4588cbbf 100644 --- a/gnes/encoder/video/inception.py +++ b/gnes/encoder/video/inception.py @@ -69,9 +69,7 @@ def encode(self, data: List['np.ndarray'], *args, pos_start = [0] + [sum(v_len[:i + 1]) for i in range(len(v_len) - 1)] pos_end = [sum(v_len[:i + 1]) for i in range(len(v_len))] - def _resize(x): - self.logger.info('image shape: %s' % str(x.shape)) - return np.array(Image.fromarray(x).resize((self.inception_size_x, self.inception_size_y)), dtype=np.float32) * 2 / 255. - 1. + _resize = lambda x: np.array(Image.fromarray(x).resize((self.inception_size_x, self.inception_size_y)), dtype=np.float32) * 2 / 255. - 1. images = [_resize(im) for v in data for im in v] @@ -81,6 +79,6 @@ def _encode(self, data): feed_dict={self.inputs: data}) return end_points_[self.select_layer] - encodes = _encode(images).astype(np.float32) + encodes = _encode(self, images).astype(np.float32) return [encodes[s:e].copy() for s, e in zip(pos_start, pos_end)] From 2705c287ec0bab49c8da06732851d22fcb21bac9 Mon Sep 17 00:00:00 2001 From: felix Date: Fri, 11 Oct 2019 20:22:08 +0800 Subject: [PATCH 5/6] fix(video-decoder): none chunk spliter --- gnes/preprocessor/video/video_decoder.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gnes/preprocessor/video/video_decoder.py b/gnes/preprocessor/video/video_decoder.py index e20a8a60..bf734bcf 100644 --- a/gnes/preprocessor/video/video_decoder.py +++ b/gnes/preprocessor/video/video_decoder.py @@ -60,6 +60,8 @@ def apply(self, doc: 'gnes_pb2.Document') -> None: c.blob.CopyFrom(array2blob(frame)) c.offset = i c.weight = 1.0 + elif self.chunk_spliter == 'none': + pass elif self.chunk_spliter == 'shot': raise NotImplementedError else: From a591b89395cad500c2824721e75293d989b7b04f Mon Sep 17 00:00:00 2001 From: felix Date: Fri, 11 Oct 2019 20:40:59 +0800 Subject: [PATCH 6/6] remove unused codes --- gnes/encoder/video/inception.py | 1 - 1 file changed, 1 deletion(-) diff --git a/gnes/encoder/video/inception.py b/gnes/encoder/video/inception.py index 4588cbbf..56c76a61 100644 --- a/gnes/encoder/video/inception.py +++ b/gnes/encoder/video/inception.py @@ -40,7 +40,6 @@ def post_init(self): import tensorflow as tf from ..image.inception_cores.inception_v4 import inception_v4 from ..image.inception_cores.inception_utils import inception_arg_scope - from .mixture_core.model import NetFV import os os.environ['CUDA_VISIBLE_DEVICES'] = str(get_first_available_gpu())