From 92500f0f1451914a7c68efdd71158b8cd03103c1 Mon Sep 17 00:00:00 2001 From: Larry Yan Date: Wed, 31 Jul 2019 19:59:09 +0800 Subject: [PATCH 1/9] fix(encoder): add netvlad and netfv --- gnes/encoder/video/incep_mixture.py | 96 ++++++++ gnes/encoder/video/mixture_core/__init__.py | 0 gnes/encoder/video/mixture_core/model.py | 245 ++++++++++++++++++++ 3 files changed, 341 insertions(+) create mode 100644 gnes/encoder/video/incep_mixture.py create mode 100644 gnes/encoder/video/mixture_core/__init__.py create mode 100644 gnes/encoder/video/mixture_core/model.py diff --git a/gnes/encoder/video/incep_mixture.py b/gnes/encoder/video/incep_mixture.py new file mode 100644 index 00000000..052960b3 --- /dev/null +++ b/gnes/encoder/video/incep_mixture.py @@ -0,0 +1,96 @@ +# Tencent is pleased to support the open source community by making GNES available. +# +# Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import List + +import numpy as np +from PIL import Image + +from ..base import BaseVideoEncoder +from ...helper import batching, batch_iterator, get_first_available_gpu + + +class IncepMixtureEncoder(BaseVideoEncoder): + + def __init__(self, model_dir_inception: str, + model_dir_mixture: str, + batch_size: int = 64, + select_layer: str = 'PreLogitsFlatten', + use_cuda: bool = False, + feature_size: int = 300, + vocab_size: int = 28, + cluster_size: int = 256, + method: str = 'netvlad', + input_size: int = 1536, + multitask_method: str = 'Attention' + *args, **kwargs): + super().__init__(*args, **kwargs) + self.model_dir_inception = model_dir_inception + self.model_dir_mixture = model_dir_mixture + self.batch_size = batch_size + self.select_layer = select_layer + self.use_cuda = use_cuda + self.cluster_size = cluster_size + self.feature_size = feature_size + self.vocab_size = vocab_size + self.method = method + self.input_size = input_size + self.multitask_method = multitask_method + + def post_init(self): + import tensorflow as tf + from ..image.inception_cores.inception_v4 import inception_v4 + from ..image.inception_cores.inception_utils import inception_arg_scope + from .mixture_core.incep_mixture import * + import os + os.environ['CUDA_VISIBLE_DEVICES'] = str(get_first_available_gpu()) + + g = tf.Graph() + with g.as_default(): + arg_scope = inception_arg_scope() + inception_v4.default_image_size = self.inception_size_x + self.inputs = tf.placeholder(tf.float32, (None, + self.inception_size_x, + self.inception_size_y, 3)) + + with tf.contrib.slim.arg_scope(arg_scope): + self.logits, self.end_points = inception_v4(self.inputs, + is_training=False, + dropout_keep_prob=1.0) + + config = tf.ConfigProto(log_device_placement=False) + if self._use_cuda: + config.gpu_options.allow_growth = True + self.sess = tf.Session(config=config) + self.saver = tf.train.Saver() + self.saver.restore(self.sess, self.model_dir_inception) + + g2 = tf.Graph() + with g2.as_default(): + config = tf.ConfigProto(log_device_placement=False) + if self._use_cuda: + config.gpu_options.allow_growth = True + self.sess2 = tf.Session(config=config) + self.mix_model = NetFV(feature_size=self.feature_size, + cluster_size=self.cluster_size, + vocab_size=self.vocab_size, + input_size=self.input_size, + use_2nd_label=True, + multitask_method=self.multitask_method, + method=self.method, + is_training=False) + saver = tf.train.Saver(max_to_keep=1) + self.sess2.run(tf.global_variables_initializer()) + saver.restore(self.sess2, self.model_dir_mixture) diff --git a/gnes/encoder/video/mixture_core/__init__.py b/gnes/encoder/video/mixture_core/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/gnes/encoder/video/mixture_core/model.py b/gnes/encoder/video/mixture_core/model.py new file mode 100644 index 00000000..6a4a0631 --- /dev/null +++ b/gnes/encoder/video/mixture_core/model.py @@ -0,0 +1,245 @@ +# Tencent is pleased to support the open source community by making GNES available. +# +# Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +import tensorflow as tf +import tensorflow.contrib.slim as slim + + +class NetFV(): + def __init__(self, feature_size, + cluster_size, + vocab_size, + method='netvlad', + input_size=None, + use_2nd_label=False, + vocab_size_2=None, + add_batch_norm=True, + is_training=False, + use_weights=True, + save_dir=None, + multitask_method=None, + l2_penalty=1e-6): + if input_size == None: + self.input_size = feature_size + else: + self.input_size = input_size + self.feature_size = feature_size + self.is_training = is_training + self.vocab_size = vocab_size + self.use_2nd_label = use_2nd_label + self.vocab_size_2 = vocab_size_2 + self.add_batch_norm = add_batch_norm + self.cluster_size = cluster_size + self.use_weights = use_weights + self.l2_penalty = l2_penalty + self.method = method + self.multitask_method = multitask_method + self.build_model() + self.build_loss() + + @staticmethod + def rand_init(feature_size): + return tf.random_normal_initializer(stddev=1/math.sqrt(feature_size)) + + def build_model(self): + self.feeds = tf.placeholder(tf.float32, [None, None, self.input_size]) + #self.inputs = self.feeds + self.inputs = tf.layers.dense(self.feeds, self.feature_size) + self.weights = tf.placeholder(tf.float32, [None, self.vocab_size]) + self.max_frames = tf.shape(self.inputs)[1] + if self.method == 'fvnet': + self.build_fvnet() + elif self.method == 'netvlad': + self.build_netvlad() + elif self.method == 'pooling': + self.build_pooling() + + def build_pooling(self): + self.repre = tf.layers.dense(self.inputs, self.feature_size) + self.repre = tf.reduce_max(self.repre, axis=1) + + def build_fvnet(self): + reshaped_input = tf.reshape(self.inputs, [-1, self.feature_size]) + cluster_weights = tf.get_variable("cluster_weights", + [self.feature_size, self.cluster_size], + initializer=NetFV.rand_init(self.feature_size)) + + covar_weights = tf.get_variable("covar_weights", + [self.feature_size, self.cluster_size], + initializer=NetFV.rand_init(self.feature_size)) + + covar_weights = tf.square(covar_weights) + eps = tf.constant([1e-6]) + covar_weights = tf.add(covar_weights,eps) + + tf.summary.histogram("cluster_weights", cluster_weights) + activation = tf.matmul(reshaped_input, cluster_weights) + if self.add_batch_norm: + activation = slim.batch_norm(activation, + center=True, + scale=True, + is_training=self.is_training, + scope="cluster_bn") + else: + cluster_biases = tf.get_variable("cluster_biases", + [self.cluster_size], + initializer=NetFV.rand_init(self.feature_size)) + tf.summary.histogram("cluster_biases", cluster_biases) + activation += cluster_biases + + activation = tf.nn.softmax(activation) + tf.summary.histogram("cluster_output", activation) + + activation = tf.reshape(activation, [-1, self.max_frames, self.cluster_size]) + + a_sum = tf.reduce_sum(activation, -2, keepdims=True) + + cluster_weights2 = tf.scalar_mul(0.01, cluster_weights) + + a = tf.multiply(a_sum, cluster_weights2) + + activation = tf.transpose(activation,perm=[0, 2, 1]) + + reshaped_input = tf.reshape(reshaped_input, + [-1, self.max_frames, self.feature_size]) + fv1 = tf.matmul(activation, reshaped_input) + + fv1 = tf.transpose(fv1, perm=[0, 2, 1]) + + # computing second order FV + a2 = tf.multiply(a_sum, tf.square(cluster_weights2)) + + b2 = tf.multiply(fv1, cluster_weights2) + fv2 = tf.matmul(activation, tf.square(reshaped_input)) + + fv2 = tf.transpose(fv2, perm=[0, 2, 1]) + fv2 = tf.add_n([a2, fv2, tf.scalar_mul(-2, b2)]) + + fv2 = tf.divide(fv2, tf.square(covar_weights)) + fv2 = tf.subtract(fv2, a_sum) + + fv2 = tf.reshape(fv2, [-1, self.cluster_size*self.feature_size]) + fv2 = tf.nn.l2_normalize(fv2, 1) + fv2 = tf.reshape(fv2, [-1, self.cluster_size*self.feature_size]) + fv2 = tf.nn.l2_normalize(fv2, 1) + + fv1 = tf.subtract(fv1, a) + fv1 = tf.divide(fv1, covar_weights) + fv1 = tf.nn.l2_normalize(fv1, 1) + fv1 = tf.reshape(fv1, [-1, self.cluster_size*self.feature_size]) + fv1 = tf.nn.l2_normalize(fv1, 1) + + self.repre = tf.concat([fv1, fv2], 1) + self.repre = tf.layers.dense(self.repre, self.feature_size) + + def build_netvlad(self): + reshaped_input = tf.reshape(self.inputs, [-1, self.feature_size]) + cluster_weights = tf.get_variable("cluster_weights", + [self.feature_size, self.cluster_size], + initializer=NetFV.rand_init(self.feature_size)) + activation = tf.matmul(reshaped_input, cluster_weights) + if self.add_batch_norm: + activation = slim.batch_norm(activation, + center=True, + scale=True, + is_training=self.is_training, + scope="cluster_bn") + else: + cluster_biases = tf.get_variable("cluster_biases", + [self.cluster_size], + initializer=NetFV.rand_init(self.feature_size)) + activation += cluster_biases + activation = tf.nn.softmax(activation) + activation = tf.reshape(activation, [-1, self.max_frames, self.cluster_size]) + + a_sum = tf.reduce_sum(activation, -2, keep_dims=True) + + cluster_weights2 = tf.get_variable("cluster_weights2", + [1, self.feature_size, self.cluster_size], + initializer=NetFV.rand_init(self.feature_size)) + + a = tf.multiply(a_sum, cluster_weights2) + activation = tf.transpose(activation, perm=[0, 2, 1]) + + reshaped_input = tf.reshape(reshaped_input, + [-1, self.max_frames, self.feature_size]) + vlad = tf.matmul(activation, reshaped_input) + vlad = tf.transpose(vlad, perm=[0, 2, 1]) + vlad = tf.subtract(vlad, a) + + vlad = tf.nn.l2_normalize(vlad, 1) + + vlad = tf.reshape(vlad, [-1, self.cluster_size * self.feature_size]) + vlad = tf.nn.l2_normalize(vlad, 1) + self.repre = vlad + + def build_loss(self): + self.probabilities = tf.layers.dense(self.repre, + self.vocab_size, + activation=tf.nn.tanh) + self.probabilities = tf.layers.dense(self.probabilities, self.vocab_size) + self.probabilities = tf.nn.softmax(self.probabilities) + + self.label = tf.placeholder(tf.int32, [None, self.vocab_size]) + logits = tf.cast(self.label, tf.float32) + if self.use_weights: + logits = logits * self.weights + self.loss = - tf.log(tf.reduce_sum(logits * self.probabilities, axis=1)+1e-9) + self.loss = tf.reduce_mean(self.loss) + self.pred =tf.argmax(self.probabilities, 1) + self.avg_diff = tf.cast(tf.equal(tf.argmax(self.label, 1), self.pred), tf.float32) + self.avg_diff = tf.reduce_mean(self.avg_diff) + + # add 2nd layer labels + if self.use_2nd_label: + self.label_2 = tf.placeholder(tf.int32, [None, self.vocab_size_2]) + logits2 = tf.cast(self.label_2, tf.float32) + + if self.multitask_method is None: + self.probabilities2 = tf.layers.dense(self.repre, + self.vocab_size_2, + activation=tf.nn.tanh) + self.probabilities2 = tf.layers.dense(self.probabilities2, self.vocab_size_2) + self.probabilities2 = tf.nn.softmax(self.probabilities2) + + elif self.multitask_method == 'Attention': + self.x = tf.get_variable('emb', + shape=[self.vocab_size, self.feature_size], + dtype=tf.float32, + initializer=NetFV.rand_init(self.feature_size)) + self.emb_label = tf.matmul(self.probabilities, self.x) + self.emb_concat = tf.concat([self.emb_label, self.repre], axis=1) + self.probabilities2 = tf.layers.dense(self.emb_concat, + self.vocab_size_2, + activation=tf.nn.tanh) + self.probabilities2 = tf.layers.dense(self.probabilities2, + self.vocab_size_2) + self.probabilities2 = tf.nn.softmax(self.probabilities2) + + self.loss += tf.reduce_mean(-tf.log( + tf.reduce_sum(logits2*self.probabilities2, axis=1)+1e-9)) + self.pred2 = tf.argmax(self.probabilities2, 1) + self.avg_diff2 = tf.cast(tf.equal(tf.argmax(self.label_2, 1), self.pred2), tf.float32) + self.avg_diff2 = tf.reduce_mean(self.avg_diff2) + + self.optimizer = tf.train.AdamOptimizer(learning_rate=0.0005, + epsilon=1e-08, + name='adam') + self.train_op = slim.learning.create_train_op(self.loss, self.optimizer) + self.eval_res = {'loss': self.loss, 'avg_diff': self.avg_diff} + if self.use_2nd_label: + self.eval_res['avg_diff2'] = self.avg_diff2 + From 679915336a2d3d99041844717723e8a06dae5899 Mon Sep 17 00:00:00 2001 From: Larry Yan Date: Wed, 31 Jul 2019 21:16:55 +0800 Subject: [PATCH 2/9] fix(encoder): add netvlad and netfv register class --- gnes/encoder/video/incep_mixture.py | 56 +++++++++++++++++++++++------ 1 file changed, 46 insertions(+), 10 deletions(-) diff --git a/gnes/encoder/video/incep_mixture.py b/gnes/encoder/video/incep_mixture.py index 052960b3..d3461b7d 100644 --- a/gnes/encoder/video/incep_mixture.py +++ b/gnes/encoder/video/incep_mixture.py @@ -18,11 +18,11 @@ import numpy as np from PIL import Image -from ..base import BaseVideoEncoder -from ...helper import batching, batch_iterator, get_first_available_gpu +from gnes.encoder.base import BaseImageEncoder +from gnes.helper import batching, batch_iterator, get_first_available_gpu -class IncepMixtureEncoder(BaseVideoEncoder): +class IncepMixtureEncoder(BaseImageEncoder): def __init__(self, model_dir_inception: str, model_dir_mixture: str, @@ -32,9 +32,11 @@ def __init__(self, model_dir_inception: str, feature_size: int = 300, vocab_size: int = 28, cluster_size: int = 256, - method: str = 'netvlad', + method: str = 'fvnet', input_size: int = 1536, - multitask_method: str = 'Attention' + vocab_size_2: int = 174, + max_frames: int = 30, + multitask_method: str = 'Attention', *args, **kwargs): super().__init__(*args, **kwargs) self.model_dir_inception = model_dir_inception @@ -48,12 +50,16 @@ def __init__(self, model_dir_inception: str, self.method = method self.input_size = input_size self.multitask_method = multitask_method + self.inception_size_x = 299 + self.inception_size_y = 299 + self.max_frames = max_frames + self.vocab_size_2 = vocab_size_2 def post_init(self): import tensorflow as tf - from ..image.inception_cores.inception_v4 import inception_v4 - from ..image.inception_cores.inception_utils import inception_arg_scope - from .mixture_core.incep_mixture import * + from gnes.encoder.image.inception_cores.inception_v4 import inception_v4 + from gnes.encoder.image.inception_cores.inception_utils import inception_arg_scope + from gnes.encoder.video.mixture_core.model import NetFV import os os.environ['CUDA_VISIBLE_DEVICES'] = str(get_first_available_gpu()) @@ -71,7 +77,7 @@ def post_init(self): dropout_keep_prob=1.0) config = tf.ConfigProto(log_device_placement=False) - if self._use_cuda: + if self.use_cuda: config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) self.saver = tf.train.Saver() @@ -80,7 +86,7 @@ def post_init(self): g2 = tf.Graph() with g2.as_default(): config = tf.ConfigProto(log_device_placement=False) - if self._use_cuda: + if self.use_cuda: config.gpu_options.allow_growth = True self.sess2 = tf.Session(config=config) self.mix_model = NetFV(feature_size=self.feature_size, @@ -88,9 +94,39 @@ def post_init(self): vocab_size=self.vocab_size, input_size=self.input_size, use_2nd_label=True, + vocab_size_2=self.vocab_size_2, multitask_method=self.multitask_method, method=self.method, is_training=False) saver = tf.train.Saver(max_to_keep=1) self.sess2.run(tf.global_variables_initializer()) saver.restore(self.sess2, self.model_dir_mixture) + + @batching + def encode(self, videos: List['np.ndarray'], *args, **kwargs) -> np.ndarray: + ret = [] + v_len = [len(v) for v in videos] + pos_start = [0] + [sum(v_len[:i]) for i in range(1, len(v_len)-1)] + pos_end = [sum(v_len[:i]) for i in range(1, len(v_len))] + max_len = min(max(v_len), self.max_frames) + + img = [im for v in videos for im in v] + img = [(np.array(Image.fromarray(im).resize((self.inception_size_x, + self.inception_size_y)), dtype=np.float32) * 2 / 255. - 1.) for im + in img] + for _im in batch_iterator(img, self.batch_size): + _, end_points_ = self.sess.run((self.logits, self.end_points), + feed_dict={self.inputs: _im}) + ret.append(end_points_[self.select_layer]) + v = [_ for vi in ret for _ in vi] + + v_input = [v[s:e] for s, e in zip(pos_start, pos_end)] + v_input = [(vi + [[0.0]*self.input_size]*(max_len-len(vi)))[:max_len] for vi in v_input] + v_input = [np.array(vi, dtype=np.float32) for vi in v_input] + + ret = [] + for _vi in batch_iterator(v_input, self.batch_size): + repre = self.sess2.run(self.mix_model.repre, + feed_dict={self.mix_model.feeds: v_input}) + ret.append(repre) + return np.concatenate(ret, axis=1).astype(np.float32) From 3fdf1c06e302a5c3e32d28de431e076802ff5c9e Mon Sep 17 00:00:00 2001 From: Larry Yan Date: Wed, 31 Jul 2019 21:43:52 +0800 Subject: [PATCH 3/9] fix(encoder): fix mixture --- gnes/encoder/__init__.py | 4 ++- gnes/encoder/base.py | 6 +++++ gnes/preprocessor/__init__.py | 1 + gnes/preprocessor/video/ffmpeg.py | 42 +++++++++++++++++++++++++++++++ 4 files changed, 52 insertions(+), 1 deletion(-) diff --git a/gnes/encoder/__init__.py b/gnes/encoder/__init__.py index e77f2411..94dbeefe 100644 --- a/gnes/encoder/__init__.py +++ b/gnes/encoder/__init__.py @@ -34,13 +34,15 @@ 'BaseEncoder': 'base', 'BaseBinaryEncoder': 'base', 'BaseTextEncoder': 'base', + 'BaseVideoEncoder': 'base', 'BaseNumericEncoder': 'base', 'CompositionalEncoder': 'base', 'PipelineEncoder': 'base', 'HashEncoder': 'numeric.hash', 'BasePytorchEncoder': 'image.base', 'TFInceptionEncoder': 'image.inception', - 'CVAEEncoder': 'image.cvae' + 'CVAEEncoder': 'image.cvae', + 'IncepMixtureEncoder': 'video.incep_mixture' } register_all_class(_cls2file_map, 'encoder') diff --git a/gnes/encoder/base.py b/gnes/encoder/base.py index 964e237e..24f15f3a 100644 --- a/gnes/encoder/base.py +++ b/gnes/encoder/base.py @@ -38,6 +38,12 @@ def encode(self, img: List['np.ndarray'], *args, **kwargs) -> np.ndarray: pass +class BaseVideoEncoder(BaseEncoder): + + def encode(self, img: List['np.ndarray'], *args, **kwargs) -> np.ndarray: + pass + + class BaseTextEncoder(BaseEncoder): def encode(self, text: List[str], *args, **kwargs) -> np.ndarray: diff --git a/gnes/preprocessor/__init__.py b/gnes/preprocessor/__init__.py index 6764e9b0..b248de3d 100644 --- a/gnes/preprocessor/__init__.py +++ b/gnes/preprocessor/__init__.py @@ -30,6 +30,7 @@ 'BaseUnaryPreprocessor': 'base', 'BaseVideoPreprocessor': 'video.base', 'FFmpegPreprocessor': 'video.ffmpeg', + 'FFmpegVideoSegmentor': 'video.ffmpeg', 'ShotDetectPreprocessor': 'video.shotdetect', } diff --git a/gnes/preprocessor/video/ffmpeg.py b/gnes/preprocessor/video/ffmpeg.py index c21439b9..70a74dd5 100644 --- a/gnes/preprocessor/video/ffmpeg.py +++ b/gnes/preprocessor/video/ffmpeg.py @@ -106,3 +106,45 @@ def duplicate_rm_hash(self, ret.append((i, h)) return [images[_[0]] for _ in ret] + + +class FFmpegVideoSegmentor(BaseVideoPreprocessor): + def __init__(self, + frame_size: str = "192*168", + segment_method: str = 'uniform', + segment_interval: int = -1, + *args, + **kwargs): + super().__init__(*args, **kwargs) + self.frame_size = frame_size + self.segment_method = segment_method + self.segment_interval = segment_interval + self._ffmpeg_kwargs = kwargs + + def apply(self, doc: 'gnes_pb2.Document') -> None: + super().apply(doc) + if doc.raw_bytes: + frames = get_video_frames( + doc.raw_bytes, + s=self.frame_size, + vsync=self._ffmpeg_kwargs.get("vsync", "vfr"), + vf=self._ffmpeg_kwargs.get("vf", "select=eq(pict_type\\,I)")) + + sub_videos = [] + if len(frames) >= 1: + if self.segment_method == 'uniform': + if self.segment_interval == -1: + sub_videos = [frames] + else: + sub_videos = [frames[_: _+self.segment_interval] + for _ in range(0, len(frames), self.segment_interval)] + for ci, chunk in enumerate(sub_videos): + c = doc.chunks.add() + c.doc_id = doc.doc_id + c.blob.CopyFrom(array2blob(np.array(chunk, dtype=np.uint8))) + c.offset_1d = ci + c.weight = 1 / len(sub_videos) + else: + self.logger.info('bad document: no key frames extracted') + else: + self.logger.error('bad document: "raw_bytes" is empty!') From 95f03c56701f4a691f8412490f6dcf9565751da1 Mon Sep 17 00:00:00 2001 From: Larry Yan Date: Thu, 1 Aug 2019 10:32:00 +0800 Subject: [PATCH 4/9] fix(encoder): fix bug in video mixture encoder --- gnes/encoder/video/incep_mixture.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gnes/encoder/video/incep_mixture.py b/gnes/encoder/video/incep_mixture.py index d3461b7d..357b60cb 100644 --- a/gnes/encoder/video/incep_mixture.py +++ b/gnes/encoder/video/incep_mixture.py @@ -107,7 +107,7 @@ def encode(self, videos: List['np.ndarray'], *args, **kwargs) -> np.ndarray: ret = [] v_len = [len(v) for v in videos] pos_start = [0] + [sum(v_len[:i]) for i in range(1, len(v_len)-1)] - pos_end = [sum(v_len[:i]) for i in range(1, len(v_len))] + pos_end = [sum(v_len[:i]) for i in range(len(v_len))] max_len = min(max(v_len), self.max_frames) img = [im for v in videos for im in v] From 17779676bd452b2679ba7e64918721cf85e0bec1 Mon Sep 17 00:00:00 2001 From: Larry Yan Date: Thu, 1 Aug 2019 10:44:06 +0800 Subject: [PATCH 5/9] fix(encoder): fix mixture encoder --- gnes/encoder/video/incep_mixture.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gnes/encoder/video/incep_mixture.py b/gnes/encoder/video/incep_mixture.py index 357b60cb..eac796e4 100644 --- a/gnes/encoder/video/incep_mixture.py +++ b/gnes/encoder/video/incep_mixture.py @@ -106,8 +106,8 @@ def post_init(self): def encode(self, videos: List['np.ndarray'], *args, **kwargs) -> np.ndarray: ret = [] v_len = [len(v) for v in videos] - pos_start = [0] + [sum(v_len[:i]) for i in range(1, len(v_len)-1)] - pos_end = [sum(v_len[:i]) for i in range(len(v_len))] + pos_start = [0] + [sum(v_len[:i+1]) for i in range(len(v_len)-1)] + pos_end = [sum(v_len[:i+1]) for i in range(len(v_len))] max_len = min(max(v_len), self.max_frames) img = [im for v in videos for im in v] From d6a46fa6b6a931b4dc0e334f5a89bb66460a84b0 Mon Sep 17 00:00:00 2001 From: Larry Yan Date: Thu, 1 Aug 2019 11:32:37 +0800 Subject: [PATCH 6/9] fix(encoder): fix import path for mixture encoder --- gnes/encoder/video/incep_mixture.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/gnes/encoder/video/incep_mixture.py b/gnes/encoder/video/incep_mixture.py index eac796e4..60a7710d 100644 --- a/gnes/encoder/video/incep_mixture.py +++ b/gnes/encoder/video/incep_mixture.py @@ -18,11 +18,11 @@ import numpy as np from PIL import Image -from gnes.encoder.base import BaseImageEncoder -from gnes.helper import batching, batch_iterator, get_first_available_gpu +from ..base import BaseVideoEncoder +from ...helper import batching, batch_iterator, get_first_available_gpu -class IncepMixtureEncoder(BaseImageEncoder): +class IncepMixtureEncoder(BaseVideoEncoder): def __init__(self, model_dir_inception: str, model_dir_mixture: str, @@ -57,9 +57,9 @@ def __init__(self, model_dir_inception: str, def post_init(self): import tensorflow as tf - from gnes.encoder.image.inception_cores.inception_v4 import inception_v4 - from gnes.encoder.image.inception_cores.inception_utils import inception_arg_scope - from gnes.encoder.video.mixture_core.model import NetFV + from ..image.inception_cores.inception_v4 import inception_v4 + from ..image.inception_cores.inception_utils import inception_arg_scope + from .mixture_core.model import NetFV import os os.environ['CUDA_VISIBLE_DEVICES'] = str(get_first_available_gpu()) From 5b0fe7c6c3c5f2fa433c090fdad9c8c80d59f4df Mon Sep 17 00:00:00 2001 From: Larry Yan Date: Thu, 1 Aug 2019 14:15:59 +0800 Subject: [PATCH 7/9] fix(preprocessor): fix FFmpegVideoSegmentor --- gnes/preprocessor/video/ffmpeg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gnes/preprocessor/video/ffmpeg.py b/gnes/preprocessor/video/ffmpeg.py index 70a74dd5..30dca7a1 100644 --- a/gnes/preprocessor/video/ffmpeg.py +++ b/gnes/preprocessor/video/ffmpeg.py @@ -110,7 +110,7 @@ def duplicate_rm_hash(self, class FFmpegVideoSegmentor(BaseVideoPreprocessor): def __init__(self, - frame_size: str = "192*168", + frame_size: str = "299*299", segment_method: str = 'uniform', segment_interval: int = -1, *args, From 15eb50b4d1a85e3b3ddd8ceeb7e3fd0b2b45b428 Mon Sep 17 00:00:00 2001 From: Larry Yan Date: Thu, 1 Aug 2019 14:29:30 +0800 Subject: [PATCH 8/9] fix(encoder): fix params in basevideo encoder --- gnes/encoder/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gnes/encoder/base.py b/gnes/encoder/base.py index 24f15f3a..6fb45b7c 100644 --- a/gnes/encoder/base.py +++ b/gnes/encoder/base.py @@ -40,7 +40,7 @@ def encode(self, img: List['np.ndarray'], *args, **kwargs) -> np.ndarray: class BaseVideoEncoder(BaseEncoder): - def encode(self, img: List['np.ndarray'], *args, **kwargs) -> np.ndarray: + def encode(self, videos: List['np.ndarray'], *args, **kwargs) -> np.ndarray: pass From 46b5c94eea4973e3f1b2dcb05e85981f316a4ca3 Mon Sep 17 00:00:00 2001 From: Larry Yan Date: Thu, 1 Aug 2019 14:52:31 +0800 Subject: [PATCH 9/9] fix(encoder): fix name for video encoder --- gnes/encoder/base.py | 2 +- gnes/encoder/video/incep_mixture.py | 6 +++--- gnes/helper.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/gnes/encoder/base.py b/gnes/encoder/base.py index 6fb45b7c..d21ad3aa 100644 --- a/gnes/encoder/base.py +++ b/gnes/encoder/base.py @@ -40,7 +40,7 @@ def encode(self, img: List['np.ndarray'], *args, **kwargs) -> np.ndarray: class BaseVideoEncoder(BaseEncoder): - def encode(self, videos: List['np.ndarray'], *args, **kwargs) -> np.ndarray: + def encode(self, data: List['np.ndarray'], *args, **kwargs) -> np.ndarray: pass diff --git a/gnes/encoder/video/incep_mixture.py b/gnes/encoder/video/incep_mixture.py index 60a7710d..2c5a8a3f 100644 --- a/gnes/encoder/video/incep_mixture.py +++ b/gnes/encoder/video/incep_mixture.py @@ -103,14 +103,14 @@ def post_init(self): saver.restore(self.sess2, self.model_dir_mixture) @batching - def encode(self, videos: List['np.ndarray'], *args, **kwargs) -> np.ndarray: + def encode(self, data: List['np.ndarray'], *args, **kwargs) -> np.ndarray: ret = [] - v_len = [len(v) for v in videos] + v_len = [len(v) for v in data] pos_start = [0] + [sum(v_len[:i+1]) for i in range(len(v_len)-1)] pos_end = [sum(v_len[:i+1]) for i in range(len(v_len))] max_len = min(max(v_len), self.max_frames) - img = [im for v in videos for im in v] + img = [im for v in data for im in v] img = [(np.array(Image.fromarray(im).resize((self.inception_size_x, self.inception_size_y)), dtype=np.float32) * 2 / 255. - 1.) for im in img] diff --git a/gnes/helper.py b/gnes/helper.py index 9758aa02..7e69b0bf 100644 --- a/gnes/helper.py +++ b/gnes/helper.py @@ -48,8 +48,8 @@ def get_first_available_gpu(): try: import GPUtil r = GPUtil.getAvailable(order='random', - maxMemory=0.1, - maxLoad=0.1, + maxMemory=0.5, + maxLoad=0.5, limit=1) if r: return r[0]