diff --git a/gnes/encoder/__init__.py b/gnes/encoder/__init__.py index 1e6ffa0a..e77f2411 100644 --- a/gnes/encoder/__init__.py +++ b/gnes/encoder/__init__.py @@ -40,6 +40,7 @@ 'HashEncoder': 'numeric.hash', 'BasePytorchEncoder': 'image.base', 'TFInceptionEncoder': 'image.inception', + 'CVAEEncoder': 'image.cvae' } register_all_class(_cls2file_map, 'encoder') diff --git a/gnes/encoder/image/cvae.py b/gnes/encoder/image/cvae.py index f297e17d..84c3f9dd 100644 --- a/gnes/encoder/image/cvae.py +++ b/gnes/encoder/image/cvae.py @@ -28,6 +28,7 @@ def __init__(self, model_dir: str, latent_dim: int = 300, batch_size: int = 64, select_method: str = 'MEAN', + l2_normalize: bool = False, use_gpu: bool = True, *args, **kwargs): super().__init__(*args, **kwargs) @@ -36,24 +37,26 @@ def __init__(self, model_dir: str, self.latent_dim = latent_dim self.batch_size = batch_size self.select_method = select_method + self.l2_normalize = l2_normalize self.use_gpu = use_gpu def post_init(self): import tensorflow as tf - from .cave_cores.model import CVAE + from .cvae_cores.model import CVAE + g = tf.Graph() + with g.as_default(): + self._model = CVAE(self.latent_dim) + self.inputs = tf.placeholder(tf.float32, + (None, 120, 120, 3)) - self._model = CVAE(self.latent_dim) - self.inputs = tf.placeholder(tf.float32, - (None, 120, 120, 3)) + self.mean, self.var = self._model.encode(self.inputs) - self.mean, self.var = self._model.encode(self.inputs) - - config = tf.ConfigProto(log_device_placement=False) - if self.use_gpu: - config.gpu_options.allow_growth = True - self.sess = tf.Session(config=config) - self.saver = tf.train.Saver() - self.saver.restore(self.sess, self.model_dir) + config = tf.ConfigProto(log_device_placement=False) + if self.use_gpu: + config.gpu_options.allow_growth = True + self.sess = tf.Session(config=config) + self.saver = tf.train.Saver() + self.saver.restore(self.sess, self.model_dir) def encode(self, img: List['np.ndarray'], *args, **kwargs) -> np.ndarray: ret = [] @@ -68,4 +71,7 @@ def encode(self, img: List['np.ndarray'], *args, **kwargs) -> np.ndarray: ret.append(_var) elif self.select_method == 'MEAN_VAR': ret.append(np.concatenate([_mean, _var]), axis=1) - return np.concatenate(ret, axis=0).astype(np.float32) + v = np.concatenate(ret, axis=0).astype(np.float32) + if self.l2_normalize: + v = v / (v**2).sum(axis=1, keepdims=True)**0.5 + return v diff --git a/gnes/encoder/image/cvae_cores/__init__.py b/gnes/encoder/image/cvae_cores/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/gnes/indexer/fulltext/leveldb.py b/gnes/indexer/fulltext/leveldb.py index 7d15d42b..2a4bf3fa 100644 --- a/gnes/indexer/fulltext/leveldb.py +++ b/gnes/indexer/fulltext/leveldb.py @@ -26,10 +26,16 @@ class LVDBIndexer(BaseTextIndexer): - def __init__(self, data_path: str, keep_na_doc: bool = True, *args, **kwargs): + def __init__(self, data_path: str, + keep_na_doc: bool = True, + drop_raw_bytes: bool = False, + drop_chunk_blob: bool = False, + *args, **kwargs): super().__init__(*args, **kwargs) self.data_path = data_path self.keep_na_doc = keep_na_doc + self.drop_raw_bytes = drop_raw_bytes + self.drop_chunk_blob = drop_chunk_blob self._NOT_FOUND = None def post_init(self): @@ -40,6 +46,11 @@ def add(self, keys: List[int], docs: List['gnes_pb2.Document'], *args, **kwargs) with self._db.write_batch() as wb: for k, d in zip(keys, docs): doc_id = pickle.dumps(k) + if self.drop_raw_bytes: + d.raw_bytes = b'' + if self.drop_chunk_blob: + for i in range(len(d.chunks)): + d.chunks[i].ClearField('blob') doc = d.SerializeToString() wb.put(doc_id, doc) diff --git a/gnes/preprocessor/image/base.py b/gnes/preprocessor/image/base.py index e2e72d56..a557cbe9 100644 --- a/gnes/preprocessor/image/base.py +++ b/gnes/preprocessor/image/base.py @@ -32,8 +32,18 @@ def __init__(self, def _get_all_chunks_weight(self, image_set: List['np.ndarray']) -> List[float]: pass - @classmethod - def _torch_transform(cls, image): + @staticmethod + def _torch_transform(image): import torchvision.transforms as transforms return transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))])(image) + + @staticmethod + def _get_all_subarea(image): + from itertools import product + x_list = [0, image.size[0] / 3, 2 * image.size[0] / 3, image.size[0]] + y_list = [0, image.size[1] / 3, 2 * image.size[1] / 3, image.size[1]] + + index = [[x, y, x + 1, y + 1] for [x, y] in product(range(len(x_list) - 1), range(len(y_list) - 1))] + all_subareas = [[x_list[idx[0]], y_list[idx[1]], x_list[idx[2]], y_list[idx[3]]] for idx in index] + return all_subareas, index \ No newline at end of file diff --git a/gnes/preprocessor/image/segmentation.py b/gnes/preprocessor/image/segmentation.py index 5b9fa958..b5a6392f 100644 --- a/gnes/preprocessor/image/segmentation.py +++ b/gnes/preprocessor/image/segmentation.py @@ -18,6 +18,7 @@ import numpy as np from PIL import Image +from typing import List from .base import BaseImagePreprocessor from ...proto import array2blob @@ -52,6 +53,7 @@ def apply(self, doc: 'gnes_pb2.Document'): super().apply(doc) if doc.raw_bytes: original_image = Image.open(io.BytesIO(doc.raw_bytes)) + all_subareas, index = self._get_all_subarea(original_image) image_tensor = self._torch_transform(original_image) if self._use_cuda: image_tensor = image_tensor.cuda() @@ -68,6 +70,7 @@ def apply(self, doc: 'gnes_pb2.Document'): c.doc_id = doc.doc_id c.blob.CopyFrom(array2blob(self._crop_image_reshape(original_image, ele[0]))) c.offset_1d = ci + c.offset_nd.x.extend(self._get_seg_offset_nd(all_subareas, index, ele[0])) c.weight = self._cal_area(ele[0]) / (original_image.size[0] * original_image.size[1]) c = doc.chunks.add() @@ -75,6 +78,7 @@ def apply(self, doc: 'gnes_pb2.Document'): c.blob.CopyFrom(array2blob(np.array(original_image.resize((self.target_img_size, self.target_img_size))))) c.offset_1d = len(chunks) + c.offset_nd.x.extend([100, 100]) c.weight = 1. else: self.logger.error('bad document: "raw_bytes" is empty!') @@ -83,6 +87,23 @@ def _crop_image_reshape(self, original_image, coordinates): return np.array(original_image.crop(coordinates).resize((self.target_img_size, self.target_img_size))) - @classmethod - def _cal_area(cls, coordinate): + def _get_seg_offset_nd(self, all_subareas: List[List[int]], index: List[List[int]], chunk: List[int]) -> List[int]: + iou_list = [self._cal_iou(area, chunk) for area in all_subareas] + return index[int(np.argmax(iou_list))][:2] + + @staticmethod + def _cal_area(coordinate: List[int]): return (coordinate[2] - coordinate[0]) * (coordinate[3] - coordinate[1]) + + def _cal_iou(self, image: List[int], chunk: List[int]) -> float: + chunk_area = self._cal_area(chunk) + image_area = self._cal_area(image) + + x1 = max(chunk[0], image[0]) + y1 = max(chunk[1], image[1]) + x2 = min(chunk[2], image[2]) + y2 = min(chunk[3], image[3]) + + overlap_area = max(0, x2 - x1) * max(0, y2 - y1) + iou = overlap_area / (chunk_area + image_area - overlap_area) + return iou diff --git a/gnes/preprocessor/image/sliding_window.py b/gnes/preprocessor/image/sliding_window.py index 3a32d447..bef92f53 100644 --- a/gnes/preprocessor/image/sliding_window.py +++ b/gnes/preprocessor/image/sliding_window.py @@ -40,8 +40,9 @@ def __init__(self, window_size: int = 64, def apply(self, doc: 'gnes_pb2.Document'): super().apply(doc) if doc.raw_bytes: - img = np.array(Image.open(io.BytesIO(doc.raw_bytes))) - image_set = self._get_all_sliding_window(img) + original_image = Image.open(io.BytesIO(doc.raw_bytes)) + all_subareas, index = self._get_all_subarea(original_image) + image_set, center_point_list = self._get_all_sliding_window(np.array(original_image)) normalizaed_image_set = [np.array(self._torch_transform(img)).transpose(1, 2, 0) for img in image_set] weight = self._get_all_chunks_weight(normalizaed_image_set) @@ -51,11 +52,12 @@ def apply(self, doc: 'gnes_pb2.Document'): c.doc_id = doc.doc_id c.blob.CopyFrom(array2blob(ele[0])) c.offset_1d = ci + c.offset_nd.x.extend(self._get_slid_offset_nd(all_subareas, index, center_point_list[ci])) c.weight = ele[1] else: self.logger.error('bad document: "raw_bytes" is empty!') - def _get_all_sliding_window(self, img: 'np.ndarray') -> List['np.ndarray']: + def _get_all_sliding_window(self, img: 'np.ndarray'): extend_height = self.window_size - (img.shape[0]) % self.stride_height extend_wide = self.window_size - (img.shape[1]) % self.stride_wide @@ -81,9 +83,36 @@ def _get_all_sliding_window(self, img: 'np.ndarray') -> List['np.ndarray']: ), writeable=False ) + center_point_list = [ + [self.window_size / 2 + x * self.stride_wide, self.window_size / 2 + y * self.stride_height] + for x in range(expanded_input.shape[0]) + for y in range(expanded_input.shape[1])] + expanded_input = expanded_input.reshape((-1, self.window_size, self.window_size, 3)) return [np.array(Image.fromarray(img).resize((self.target_img_size, self.target_img_size))) for img in - expanded_input] + expanded_input], center_point_list + + def _get_slid_offset_nd(self, all_subareas: List[List[int]], index: List[List[int]], center_point: List[float]) -> List[int]: + location_list = self._get_location(all_subareas, center_point) + location = [i for i in range(len(location_list)) if location_list[i] is True][0] + return index[location][:2] + + @staticmethod + def _get_location(all_subareas: List[List[int]], center_point: List[float]) -> List[bool]: + location_list = [] + x_boundary = max([x[1] for x in all_subareas]) + y_boundary = max([y[3] for y in all_subareas]) + for area in all_subareas: + if center_point[0] in range(int(area[0]), int(area[2])) and center_point[1] in range(int(area[1]), + int(area[3])): + location_list.append(True) + elif center_point[0] in range(int(area[0]), int(area[2])) and y_boundary == area[3] and center_point[1] > y_boundary: + location_list.append(True) + elif center_point[1] in range(int(area[1]), int(area[3])) and x_boundary == area[2] and center_point[0] > x_boundary: + location_list.append(True) + else: + location_list.append(False) + return location_list class VanillaSlidingPreprocessor(BaseSlidingPreprocessor): diff --git a/gnes/proto/__init__.py b/gnes/proto/__init__.py index 7d4e0b9d..058768fd 100644 --- a/gnes/proto/__init__.py +++ b/gnes/proto/__init__.py @@ -34,6 +34,7 @@ def index(data: List[bytes], batch_size: int = 0, *args, **kwargs): for raw_bytes in pi: d = req.index.docs.add() d.raw_bytes = raw_bytes + d.weight = 1.0 yield req @staticmethod diff --git a/tests/yaml/base-segmentation-image-prep.yml b/tests/yaml/base-segmentation-image-prep.yml index bd81db6f..f36c3b55 100644 --- a/tests/yaml/base-segmentation-image-prep.yml +++ b/tests/yaml/base-segmentation-image-prep.yml @@ -5,4 +5,5 @@ parameter: target_img_size: 224 _use_cuda: false gnes_config: + name: fasterRCNN is_trained: true \ No newline at end of file