Merge remote-tracking branch 'origin/improve-readme' into improve-readme

gnes-ai · Jul 24, 2019 · 216cecc · 216cecc
2 parents 45751e1 + 3ec18b4
commit 216cecc
Show file tree

Hide file tree

Showing 9 changed files with 102 additions and 22 deletions.
diff --git a/gnes/encoder/__init__.py b/gnes/encoder/__init__.py
@@ -40,6 +40,7 @@
     'HashEncoder': 'numeric.hash',
     'BasePytorchEncoder': 'image.base',
     'TFInceptionEncoder': 'image.inception',
+    'CVAEEncoder': 'image.cvae'
 }
 
 register_all_class(_cls2file_map, 'encoder')
diff --git a/gnes/encoder/image/cvae.py b/gnes/encoder/image/cvae.py
@@ -28,6 +28,7 @@ def __init__(self, model_dir: str,
                  latent_dim: int = 300,
                  batch_size: int = 64,
                  select_method: str = 'MEAN',
+                 l2_normalize: bool = False,
                  use_gpu: bool = True,
                  *args, **kwargs):
         super().__init__(*args, **kwargs)
@@ -36,24 +37,26 @@ def __init__(self, model_dir: str,
         self.latent_dim = latent_dim
         self.batch_size = batch_size
         self.select_method = select_method
+        self.l2_normalize = l2_normalize
         self.use_gpu = use_gpu
 
     def post_init(self):
         import tensorflow as tf
-        from .cave_cores.model import CVAE
+        from .cvae_cores.model import CVAE
+        g = tf.Graph()
+        with g.as_default():
+            self._model = CVAE(self.latent_dim)
+            self.inputs = tf.placeholder(tf.float32,
+                                         (None, 120, 120, 3))
 
-        self._model = CVAE(self.latent_dim)
-        self.inputs = tf.placeholder(tf.float32,
-                                     (None, 120, 120, 3))
+            self.mean, self.var = self._model.encode(self.inputs)
 
-        self.mean, self.var = self._model.encode(self.inputs)
-
-        config = tf.ConfigProto(log_device_placement=False)
-        if self.use_gpu:
-            config.gpu_options.allow_growth = True
-        self.sess = tf.Session(config=config)
-        self.saver = tf.train.Saver()
-        self.saver.restore(self.sess, self.model_dir)
+            config = tf.ConfigProto(log_device_placement=False)
+            if self.use_gpu:
+                config.gpu_options.allow_growth = True
+            self.sess = tf.Session(config=config)
+            self.saver = tf.train.Saver()
+            self.saver.restore(self.sess, self.model_dir)
 
     def encode(self, img: List['np.ndarray'], *args, **kwargs) -> np.ndarray:
         ret = []
@@ -68,4 +71,7 @@ def encode(self, img: List['np.ndarray'], *args, **kwargs) -> np.ndarray:
                 ret.append(_var)
             elif self.select_method == 'MEAN_VAR':
                 ret.append(np.concatenate([_mean, _var]), axis=1)
-        return np.concatenate(ret, axis=0).astype(np.float32)
+        v = np.concatenate(ret, axis=0).astype(np.float32)
+        if self.l2_normalize:
+            v = v / (v**2).sum(axis=1, keepdims=True)**0.5
+        return v
diff --git a/gnes/encoder/image/cvae_cores/__init__.py b/gnes/encoder/image/cvae_cores/__init__.py
diff --git a/gnes/indexer/fulltext/leveldb.py b/gnes/indexer/fulltext/leveldb.py
@@ -26,10 +26,16 @@
 
 class LVDBIndexer(BaseTextIndexer):
 
-    def __init__(self, data_path: str, keep_na_doc: bool = True, *args, **kwargs):
+    def __init__(self, data_path: str,
+                 keep_na_doc: bool = True,
+                 drop_raw_bytes: bool = False,
+                 drop_chunk_blob: bool = False,
+                 *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.data_path = data_path
         self.keep_na_doc = keep_na_doc
+        self.drop_raw_bytes = drop_raw_bytes
+        self.drop_chunk_blob = drop_chunk_blob
         self._NOT_FOUND = None
 
     def post_init(self):
@@ -40,6 +46,11 @@ def add(self, keys: List[int], docs: List['gnes_pb2.Document'], *args, **kwargs)
         with self._db.write_batch() as wb:
             for k, d in zip(keys, docs):
                 doc_id = pickle.dumps(k)
+                if self.drop_raw_bytes:
+                    d.raw_bytes = b''
+                if self.drop_chunk_blob:
+                    for i in range(len(d.chunks)):
+                        d.chunks[i].ClearField('blob')
                 doc = d.SerializeToString()
                 wb.put(doc_id, doc)
 

diff --git a/gnes/preprocessor/image/base.py b/gnes/preprocessor/image/base.py
@@ -32,8 +32,18 @@ def __init__(self,
     def _get_all_chunks_weight(self, image_set: List['np.ndarray']) -> List[float]:
         pass
 
-    @classmethod
-    def _torch_transform(cls, image):
+    @staticmethod
+    def _torch_transform(image):
         import torchvision.transforms as transforms
         return transforms.Compose([transforms.ToTensor(),
                                    transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))])(image)
+
+    @staticmethod
+    def _get_all_subarea(image):
+        from itertools import product
+        x_list = [0, image.size[0] / 3, 2 * image.size[0] / 3, image.size[0]]
+        y_list = [0, image.size[1] / 3, 2 * image.size[1] / 3, image.size[1]]
+
+        index = [[x, y, x + 1, y + 1] for [x, y] in product(range(len(x_list) - 1), range(len(y_list) - 1))]
+        all_subareas = [[x_list[idx[0]], y_list[idx[1]], x_list[idx[2]], y_list[idx[3]]] for idx in index]
+        return all_subareas, index
diff --git a/gnes/preprocessor/image/segmentation.py b/gnes/preprocessor/image/segmentation.py
@@ -18,6 +18,7 @@
 
 import numpy as np
 from PIL import Image
+from typing import List
 
 from .base import BaseImagePreprocessor
 from ...proto import array2blob
@@ -52,6 +53,7 @@ def apply(self, doc: 'gnes_pb2.Document'):
         super().apply(doc)
         if doc.raw_bytes:
             original_image = Image.open(io.BytesIO(doc.raw_bytes))
+            all_subareas, index = self._get_all_subarea(original_image)
             image_tensor = self._torch_transform(original_image)
             if self._use_cuda:
                 image_tensor = image_tensor.cuda()
@@ -68,13 +70,15 @@ def apply(self, doc: 'gnes_pb2.Document'):
                 c.doc_id = doc.doc_id
                 c.blob.CopyFrom(array2blob(self._crop_image_reshape(original_image, ele[0])))
                 c.offset_1d = ci
+                c.offset_nd.x.extend(self._get_seg_offset_nd(all_subareas, index, ele[0]))
                 c.weight = self._cal_area(ele[0]) / (original_image.size[0] * original_image.size[1])
 
             c = doc.chunks.add()
             c.doc_id = doc.doc_id
             c.blob.CopyFrom(array2blob(np.array(original_image.resize((self.target_img_size,
                                                                        self.target_img_size)))))
             c.offset_1d = len(chunks)
+            c.offset_nd.x.extend([100, 100])
             c.weight = 1.
         else:
             self.logger.error('bad document: "raw_bytes" is empty!')
@@ -83,6 +87,23 @@ def _crop_image_reshape(self, original_image, coordinates):
         return np.array(original_image.crop(coordinates).resize((self.target_img_size,
                                                                  self.target_img_size)))
 
-    @classmethod
-    def _cal_area(cls, coordinate):
+    def _get_seg_offset_nd(self, all_subareas: List[List[int]], index: List[List[int]], chunk: List[int]) -> List[int]:
+        iou_list = [self._cal_iou(area, chunk) for area in all_subareas]
+        return index[int(np.argmax(iou_list))][:2]
+
+    @staticmethod
+    def _cal_area(coordinate: List[int]):
         return (coordinate[2] - coordinate[0]) * (coordinate[3] - coordinate[1])
+
+    def _cal_iou(self, image: List[int], chunk: List[int]) -> float:
+        chunk_area = self._cal_area(chunk)
+        image_area = self._cal_area(image)
+
+        x1 = max(chunk[0], image[0])
+        y1 = max(chunk[1], image[1])
+        x2 = min(chunk[2], image[2])
+        y2 = min(chunk[3], image[3])
+
+        overlap_area = max(0, x2 - x1) * max(0, y2 - y1)
+        iou = overlap_area / (chunk_area + image_area - overlap_area)
+        return iou
diff --git a/gnes/preprocessor/image/sliding_window.py b/gnes/preprocessor/image/sliding_window.py
@@ -40,8 +40,9 @@ def __init__(self, window_size: int = 64,
     def apply(self, doc: 'gnes_pb2.Document'):
         super().apply(doc)
         if doc.raw_bytes:
-            img = np.array(Image.open(io.BytesIO(doc.raw_bytes)))
-            image_set = self._get_all_sliding_window(img)
+            original_image = Image.open(io.BytesIO(doc.raw_bytes))
+            all_subareas, index = self._get_all_subarea(original_image)
+            image_set, center_point_list = self._get_all_sliding_window(np.array(original_image))
             normalizaed_image_set = [np.array(self._torch_transform(img)).transpose(1, 2, 0)
                                      for img in image_set]
             weight = self._get_all_chunks_weight(normalizaed_image_set)
@@ -51,11 +52,12 @@ def apply(self, doc: 'gnes_pb2.Document'):
                 c.doc_id = doc.doc_id
                 c.blob.CopyFrom(array2blob(ele[0]))
                 c.offset_1d = ci
+                c.offset_nd.x.extend(self._get_slid_offset_nd(all_subareas, index, center_point_list[ci]))
                 c.weight = ele[1]
         else:
             self.logger.error('bad document: "raw_bytes" is empty!')
 
-    def _get_all_sliding_window(self, img: 'np.ndarray') -> List['np.ndarray']:
+    def _get_all_sliding_window(self, img: 'np.ndarray'):
         extend_height = self.window_size - (img.shape[0]) % self.stride_height
         extend_wide = self.window_size - (img.shape[1]) % self.stride_wide
 
@@ -81,9 +83,36 @@ def _get_all_sliding_window(self, img: 'np.ndarray') -> List['np.ndarray']:
             ),
             writeable=False
         )
+        center_point_list = [
+            [self.window_size / 2 + x * self.stride_wide, self.window_size / 2 + y * self.stride_height]
+            for x in range(expanded_input.shape[0])
+            for y in range(expanded_input.shape[1])]
+
         expanded_input = expanded_input.reshape((-1, self.window_size, self.window_size, 3))
         return [np.array(Image.fromarray(img).resize((self.target_img_size, self.target_img_size))) for img in
-                expanded_input]
+                expanded_input], center_point_list
+
+    def _get_slid_offset_nd(self, all_subareas: List[List[int]], index: List[List[int]], center_point: List[float]) -> List[int]:
+        location_list = self._get_location(all_subareas, center_point)
+        location = [i for i in range(len(location_list)) if location_list[i] is True][0]
+        return index[location][:2]
+
+    @staticmethod
+    def _get_location(all_subareas: List[List[int]], center_point: List[float]) -> List[bool]:
+        location_list = []
+        x_boundary = max([x[1] for x in all_subareas])
+        y_boundary = max([y[3] for y in all_subareas])
+        for area in all_subareas:
+            if center_point[0] in range(int(area[0]), int(area[2])) and center_point[1] in range(int(area[1]),
+                                                                                                 int(area[3])):
+                location_list.append(True)
+            elif center_point[0] in range(int(area[0]), int(area[2])) and y_boundary == area[3] and center_point[1] > y_boundary:
+                location_list.append(True)
+            elif center_point[1] in range(int(area[1]), int(area[3])) and x_boundary == area[2] and center_point[0] > x_boundary:
+                location_list.append(True)
+            else:
+                location_list.append(False)
+        return location_list
 
 
 class VanillaSlidingPreprocessor(BaseSlidingPreprocessor):

diff --git a/gnes/proto/__init__.py b/gnes/proto/__init__.py
@@ -34,6 +34,7 @@ def index(data: List[bytes], batch_size: int = 0, *args, **kwargs):
             for raw_bytes in pi:
                 d = req.index.docs.add()
                 d.raw_bytes = raw_bytes
+                d.weight = 1.0
             yield req
 
     @staticmethod

diff --git a/tests/yaml/base-segmentation-image-prep.yml b/tests/yaml/base-segmentation-image-prep.yml
@@ -5,4 +5,5 @@ parameter:
   target_img_size: 224
   _use_cuda: false
 gnes_config:
+  name: fasterRCNN
   is_trained: true