Skip to content
This repository has been archived by the owner on Feb 22, 2020. It is now read-only.

Commit

Permalink
Merge branch 'master' into v2v
Browse files Browse the repository at this point in the history
  • Loading branch information
Larryjianfeng committed Aug 5, 2019
2 parents 7c16fb8 + 20e5723 commit 1c875da
Show file tree
Hide file tree
Showing 22 changed files with 99 additions and 113 deletions.
4 changes: 2 additions & 2 deletions gnes/base/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,8 @@ def __call__(cls, *args, **kwargs):

obj = type.__call__(cls, *args, **kwargs)

# set attribute
# set attribute with priority
# gnes_config in YAML > class attribute > default_gnes_config
for k, v in TrainableType.default_gnes_config.items():
if k in gnes_config:
v = gnes_config[k]
Expand Down Expand Up @@ -163,7 +164,6 @@ class TrainableBase(metaclass=TrainableType):
store_args_kwargs = False

def __init__(self, *args, **kwargs):
self.is_trained = False
self.verbose = 'verbose' in kwargs and kwargs['verbose']
self.logger = set_logger(self.__class__.__name__, self.verbose)
self._post_init_vars = set()
Expand Down
4 changes: 1 addition & 3 deletions gnes/cli/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,7 @@ def set_composer_flask_parser(parser=None):
parser = set_base_parser()
set_composer_parser(parser)
parser.add_argument('--flask', action='store_true', default=False,
help='using Flask to serve GNES composer in interactive mode')
parser.add_argument('--cors', type=str, default='*',
help='setting "Access-Control-Allow-Origin" for HTTP requests')
help='using Flask to serve a composer in interactive mode, aka GNES board')
parser.add_argument('--http_port', type=int, default=8080,
help='server port for receiving HTTP requests')
return parser
Expand Down
10 changes: 7 additions & 3 deletions gnes/composer/flask.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,22 @@ def _create_flask_app(self):
try:
from flask import Flask, request
from flask_compress import Compress
from flask_cors import CORS
except ImportError:
raise ImportError('Flask or its dependencies are not fully installed, '
'they are required for serving HTTP requests.'
'Please use "pip install gnes[http]" to install it.')

# support up to 10 concurrent HTTP requests
app = Flask(__name__)
args = set_composer_parser().parse_args([])
default_html = YamlComposer(args).build_all()['html']

@app.errorhandler(500)
def exception_handler(error):
self.logger.error('unhandled error, i better quit and restart! %s' % error)
return '<h1>500 Internal Error</h1> ' \
'While we are fixing the issue, do you know you can deploy GNES board locally on your machine? ' \
'Simply run <pre>docker run -d -p 0.0.0.0:80:8080/tcp gnes/gnes compose --flask</pre>', 500

@app.route('/', methods=['GET'])
def _get_homepage():
return default_html
Expand All @@ -59,7 +64,6 @@ def _regenerate():
self.logger.error(e)
return '<h1>Bad YAML input</h1> please kindly check the format, indent and content of your YAML file!', 400

CORS(app, origins=self.args.cors)
Compress().init_app(app)
return app

Expand Down
3 changes: 1 addition & 2 deletions gnes/encoder/image/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,15 @@


class BasePytorchEncoder(BaseImageEncoder):
batch_size = 64

def __init__(self, model_name: str,
layers: List[str],
model_dir: str,
batch_size: int = 64,
use_cuda: bool = False,
*args, **kwargs):
super().__init__(*args, **kwargs)

self.batch_size = batch_size
self.model_dir = model_dir
self.model_name = model_name
self.layers = layers
Expand Down
23 changes: 12 additions & 11 deletions gnes/encoder/image/cvae.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,13 @@
from PIL import Image

from ..base import BaseImageEncoder
from ...helper import batch_iterator


class CVAEEncoder(BaseImageEncoder):
batch_size = 64

def __init__(self, model_dir: str,
latent_dim: int = 300,
batch_size: int = 64,
select_method: str = 'MEAN',
l2_normalize: bool = False,
use_gpu: bool = True,
Expand All @@ -35,7 +34,6 @@ def __init__(self, model_dir: str,

self.model_dir = model_dir
self.latent_dim = latent_dim
self.batch_size = batch_size
self.select_method = select_method
self.l2_normalize = l2_normalize
self.use_gpu = use_gpu
Expand All @@ -59,19 +57,22 @@ def post_init(self):
self.saver.restore(self.sess, self.model_dir)

def encode(self, img: List['np.ndarray'], *args, **kwargs) -> np.ndarray:
ret = []
img = [(np.array(Image.fromarray(im).resize((120, 120)),
dtype=np.float32) / 255) for im in img]
for _im in batch_iterator(img, self.batch_size):

def _encode(_, data):
_mean, _var = self.sess.run((self.mean, self.var),
feed_dict={self.inputs: _im})
feed_dict={self.inputs: data})
if self.select_method == 'MEAN':
ret.append(_mean)
return _mean
elif self.select_method == 'VAR':
ret.append(_var)
return _var
elif self.select_method == 'MEAN_VAR':
ret.append(np.concatenate([_mean, _var]), axis=1)
v = np.concatenate(ret, axis=0).astype(np.float32)
return np.concatenate([_mean, _var], axis=1)
else:
raise NotImplementedError

v = _encode(None, img).astype(np.float32)
if self.l2_normalize:
v = v / (v**2).sum(axis=1, keepdims=True)**0.5
v = v / (v ** 2).sum(axis=1, keepdims=True) ** 0.5
return v
18 changes: 9 additions & 9 deletions gnes/encoder/image/inception.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,19 @@
from PIL import Image

from ..base import BaseImageEncoder
from ...helper import batching, batch_iterator, get_first_available_gpu
from ...helper import batching, get_first_available_gpu


class TFInceptionEncoder(BaseImageEncoder):
batch_size = 64

def __init__(self, model_dir: str,
batch_size: int = 64,
select_layer: str = 'PreLogitsFlatten',
use_cuda: bool = False,
*args, **kwargs):
super().__init__(*args, **kwargs)

self.model_dir = model_dir
self.batch_size = batch_size
self.select_layer = select_layer
self._use_cuda = use_cuda
self.inception_size_x = 299
Expand Down Expand Up @@ -64,14 +63,15 @@ def post_init(self):
self.saver = tf.train.Saver()
self.saver.restore(self.sess, self.model_dir)

@batching
def encode(self, img: List['np.ndarray'], *args, **kwargs) -> np.ndarray:
ret = []
img = [(np.array(Image.fromarray(im).resize((self.inception_size_x,
self.inception_size_y)), dtype=np.float32) * 2 / 255. - 1.) for im
in img]
for _im in batch_iterator(img, self.batch_size):

@batching
def _encode(_, data):
_, end_points_ = self.sess.run((self.logits, self.end_points),
feed_dict={self.inputs: _im})
ret.append(end_points_[self.select_layer])
return np.concatenate(ret, axis=0).astype(np.float32)
feed_dict={self.inputs: data})
return end_points_[self.select_layer]

return _encode(None, img).astype(np.float32)
4 changes: 3 additions & 1 deletion gnes/encoder/numeric/hash.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@


class HashEncoder(BaseNumericEncoder):
batch_size = 2048

def __init__(self, num_bytes: int,
num_bits: int = 8,
num_idx: int = 3,
Expand Down Expand Up @@ -105,7 +107,7 @@ def hash(self, vecs):
return np.concatenate(ret, axis=1).astype(np.uint32)

@train_required
@batching(batch_size=2048)
@batching
def encode(self, vecs: np.ndarray, *args, **kwargs) -> np.ndarray:
if vecs.shape[1] != self.vec_dim:
raise ValueError('input dimension error')
Expand Down
3 changes: 2 additions & 1 deletion gnes/encoder/numeric/pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@


class PCALocalEncoder(BaseNumericEncoder):
batch_size = 2048

def __init__(self, output_dim: int, num_locals: int,
*args, **kwargs):
super().__init__(*args, **kwargs)
Expand All @@ -32,7 +34,6 @@ def __init__(self, output_dim: int, num_locals: int,
self.num_locals = num_locals
self.pca_components = None
self.mean = None
self.batch_size = 2048

@batching(batch_size=get_optimal_sample_size, num_batch=1)
def train(self, vecs: np.ndarray, *args, **kwargs) -> None:
Expand Down
4 changes: 3 additions & 1 deletion gnes/encoder/numeric/pq.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@


class PQEncoder(BaseBinaryEncoder):
batch_size = 2048

def __init__(self, num_bytes: int, cluster_per_byte: int = 255, *args, **kwargs):
super().__init__(*args, **kwargs)
assert 1 < cluster_per_byte <= 255, 'cluster number should >1 and <= 255 (0 is reserved for NOP)'
Expand All @@ -49,7 +51,7 @@ def train(self, vecs: np.ndarray, *args, **kwargs):
dim_per_byte])

@train_required
@batching(batch_size=2048)
@batching
def encode(self, vecs: np.ndarray, *args, **kwargs) -> np.ndarray:
dim_per_byte = self._get_dim_per_byte(vecs)

Expand Down
6 changes: 2 additions & 4 deletions gnes/encoder/numeric/tf_pq.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,13 @@


class TFPQEncoder(PQEncoder):
batch_size = 8192

@classmethod
def pre_init(cls):
import os
os.environ['CUDA_VISIBLE_DEVICES'] = str(get_first_available_gpu())

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.batch_size = 8192

def post_init(self):
import tensorflow as tf
self._graph = self._get_graph()
Expand Down
4 changes: 2 additions & 2 deletions gnes/encoder/text/bert.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@

class BertEncoder(BaseTextEncoder):
store_args_kwargs = True
is_trained = True

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.is_trained = True
self._bc_encoder_args = args
self._bc_encoder_kwargs = kwargs

Expand All @@ -52,6 +52,7 @@ def encode(self, text: List[str], *args, **kwargs) -> np.ndarray:

class BertEncoderServer(BaseTextEncoder):
store_args_kwargs = True
is_trained = True

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
Expand All @@ -60,7 +61,6 @@ def __init__(self, *args, **kwargs):
bert_args.append('-%s' % k)
bert_args.append(str(v))
self._bert_args = bert_args
self.is_trained = True

def post_init(self):
from bert_serving.server import BertServer
Expand Down
6 changes: 3 additions & 3 deletions gnes/encoder/text/elmo.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,20 +25,20 @@


class ElmoEncoder(BaseTextEncoder):
is_trained = True
batch_size = 64

def __init__(self, model_dir: str, batch_size: int = 64, pooling_layer: int = -1,
def __init__(self, model_dir: str, pooling_layer: int = -1,
pooling_strategy: str = 'REDUCE_MEAN', *args, **kwargs):
super().__init__(*args, **kwargs)

self.model_dir = model_dir

self.batch_size = batch_size
if pooling_layer > 2:
raise ValueError('pooling_layer = %d is not supported now!' %
pooling_layer)
self.pooling_layer = pooling_layer
self.pooling_strategy = pooling_strategy
self.is_trained = True

def post_init(self):
from elmoformanylangs import Embedder
Expand Down
5 changes: 1 addition & 4 deletions gnes/encoder/text/flair.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,14 @@


class FlairEncoder(BaseTextEncoder):
is_trained = True

def __init__(self, model_name: str = 'multi-forward-fast',
batch_size: int = 64,
pooling_strategy: str = 'REDUCE_MEAN', *args, **kwargs):
super().__init__(*args, **kwargs)

self.model_name = model_name

self.batch_size = batch_size
self.pooling_strategy = pooling_strategy
self.is_trained = True

def post_init(self):
from flair.embeddings import FlairEmbeddings
Expand Down
6 changes: 3 additions & 3 deletions gnes/encoder/text/gpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,20 +25,20 @@


class GPTEncoder(BaseTextEncoder):
is_trained = True
batch_size = 64

def __init__(self,
model_dir: str,
batch_size: int = 64,
use_cuda: bool = False,
pooling_strategy: str = 'REDUCE_MEAN',
*args,
**kwargs):
super().__init__(*args, **kwargs)

self.model_dir = model_dir
self.batch_size = batch_size
self.pooling_strategy = pooling_strategy
self._use_cuda = use_cuda
self.is_trained = True

def post_init(self):
import torch
Expand Down
5 changes: 2 additions & 3 deletions gnes/encoder/text/w2v.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,16 @@


class Word2VecEncoder(BaseTextEncoder):
is_trained = True

def __init__(self, model_dir: str,
skiprows: int = 1,
batch_size: int = 64,
dimension: int = 300,
pooling_strategy: str = 'REDUCE_MEAN', *args, **kwargs):
super().__init__(*args, **kwargs)
self.model_dir = model_dir
self.skiprows = skiprows
self.batch_size = batch_size
self.pooling_strategy = pooling_strategy
self.is_trained = True
self.dimension = dimension

def post_init(self):
Expand Down
Loading

0 comments on commit 1c875da

Please sign in to comment.