Skip to content
This repository has been archived by the owner on Feb 22, 2020. It is now read-only.

Commit

Permalink
Merge pull request #66 from gnes-ai/v2v
Browse files Browse the repository at this point in the history
fix(encoder): add the code for netfv netvlad dbog and video segmentor
  • Loading branch information
Han Xiao authored Aug 1, 2019
2 parents 8150cf1 + 46b5c94 commit 323879a
Show file tree
Hide file tree
Showing 8 changed files with 431 additions and 3 deletions.
4 changes: 3 additions & 1 deletion gnes/encoder/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,15 @@
'BaseEncoder': 'base',
'BaseBinaryEncoder': 'base',
'BaseTextEncoder': 'base',
'BaseVideoEncoder': 'base',
'BaseNumericEncoder': 'base',
'CompositionalEncoder': 'base',
'PipelineEncoder': 'base',
'HashEncoder': 'numeric.hash',
'BasePytorchEncoder': 'image.base',
'TFInceptionEncoder': 'image.inception',
'CVAEEncoder': 'image.cvae'
'CVAEEncoder': 'image.cvae',
'IncepMixtureEncoder': 'video.incep_mixture'
}

register_all_class(_cls2file_map, 'encoder')
6 changes: 6 additions & 0 deletions gnes/encoder/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ def encode(self, img: List['np.ndarray'], *args, **kwargs) -> np.ndarray:
pass


class BaseVideoEncoder(BaseEncoder):

def encode(self, data: List['np.ndarray'], *args, **kwargs) -> np.ndarray:
pass


class BaseTextEncoder(BaseEncoder):

def encode(self, text: List[str], *args, **kwargs) -> np.ndarray:
Expand Down
132 changes: 132 additions & 0 deletions gnes/encoder/video/incep_mixture.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
# Tencent is pleased to support the open source community by making GNES available.
#
# Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import List

import numpy as np
from PIL import Image

from ..base import BaseVideoEncoder
from ...helper import batching, batch_iterator, get_first_available_gpu


class IncepMixtureEncoder(BaseVideoEncoder):

def __init__(self, model_dir_inception: str,
model_dir_mixture: str,
batch_size: int = 64,
select_layer: str = 'PreLogitsFlatten',
use_cuda: bool = False,
feature_size: int = 300,
vocab_size: int = 28,
cluster_size: int = 256,
method: str = 'fvnet',
input_size: int = 1536,
vocab_size_2: int = 174,
max_frames: int = 30,
multitask_method: str = 'Attention',
*args, **kwargs):
super().__init__(*args, **kwargs)
self.model_dir_inception = model_dir_inception
self.model_dir_mixture = model_dir_mixture
self.batch_size = batch_size
self.select_layer = select_layer
self.use_cuda = use_cuda
self.cluster_size = cluster_size
self.feature_size = feature_size
self.vocab_size = vocab_size
self.method = method
self.input_size = input_size
self.multitask_method = multitask_method
self.inception_size_x = 299
self.inception_size_y = 299
self.max_frames = max_frames
self.vocab_size_2 = vocab_size_2

def post_init(self):
import tensorflow as tf
from ..image.inception_cores.inception_v4 import inception_v4
from ..image.inception_cores.inception_utils import inception_arg_scope
from .mixture_core.model import NetFV
import os
os.environ['CUDA_VISIBLE_DEVICES'] = str(get_first_available_gpu())

g = tf.Graph()
with g.as_default():
arg_scope = inception_arg_scope()
inception_v4.default_image_size = self.inception_size_x
self.inputs = tf.placeholder(tf.float32, (None,
self.inception_size_x,
self.inception_size_y, 3))

with tf.contrib.slim.arg_scope(arg_scope):
self.logits, self.end_points = inception_v4(self.inputs,
is_training=False,
dropout_keep_prob=1.0)

config = tf.ConfigProto(log_device_placement=False)
if self.use_cuda:
config.gpu_options.allow_growth = True
self.sess = tf.Session(config=config)
self.saver = tf.train.Saver()
self.saver.restore(self.sess, self.model_dir_inception)

g2 = tf.Graph()
with g2.as_default():
config = tf.ConfigProto(log_device_placement=False)
if self.use_cuda:
config.gpu_options.allow_growth = True
self.sess2 = tf.Session(config=config)
self.mix_model = NetFV(feature_size=self.feature_size,
cluster_size=self.cluster_size,
vocab_size=self.vocab_size,
input_size=self.input_size,
use_2nd_label=True,
vocab_size_2=self.vocab_size_2,
multitask_method=self.multitask_method,
method=self.method,
is_training=False)
saver = tf.train.Saver(max_to_keep=1)
self.sess2.run(tf.global_variables_initializer())
saver.restore(self.sess2, self.model_dir_mixture)

@batching
def encode(self, data: List['np.ndarray'], *args, **kwargs) -> np.ndarray:
ret = []
v_len = [len(v) for v in data]
pos_start = [0] + [sum(v_len[:i+1]) for i in range(len(v_len)-1)]
pos_end = [sum(v_len[:i+1]) for i in range(len(v_len))]
max_len = min(max(v_len), self.max_frames)

img = [im for v in data for im in v]
img = [(np.array(Image.fromarray(im).resize((self.inception_size_x,
self.inception_size_y)), dtype=np.float32) * 2 / 255. - 1.) for im
in img]
for _im in batch_iterator(img, self.batch_size):
_, end_points_ = self.sess.run((self.logits, self.end_points),
feed_dict={self.inputs: _im})
ret.append(end_points_[self.select_layer])
v = [_ for vi in ret for _ in vi]

v_input = [v[s:e] for s, e in zip(pos_start, pos_end)]
v_input = [(vi + [[0.0]*self.input_size]*(max_len-len(vi)))[:max_len] for vi in v_input]
v_input = [np.array(vi, dtype=np.float32) for vi in v_input]

ret = []
for _vi in batch_iterator(v_input, self.batch_size):
repre = self.sess2.run(self.mix_model.repre,
feed_dict={self.mix_model.feeds: v_input})
ret.append(repre)
return np.concatenate(ret, axis=1).astype(np.float32)
Empty file.
Loading

0 comments on commit 323879a

Please sign in to comment.