From 4e1e53fa82a60fe81eda6a5f94bff4cc155f67c8 Mon Sep 17 00:00:00 2001
From: Jem <jif0729@gmail.com>
Date: Wed, 21 Aug 2019 14:41:46 +0800
Subject: [PATCH] feat(indexer): add preprocessor and lvdb for storing gif

---
 gnes/indexer/fulltext/filesys.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/gnes/indexer/fulltext/filesys.py b/gnes/indexer/fulltext/filesys.py
index e575e4fa..c9e93f3b 100644
--- a/gnes/indexer/fulltext/filesys.py
+++ b/gnes/indexer/fulltext/filesys.py
@@ -27,9 +27,11 @@ class DirectoryIndexer(BaseTextIndexer):
 
     def __init__(self, data_path: str,
                  keep_na_doc: bool = True,
+                 file_suffix: str = 'gif',
                  *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.data_path = data_path
+        self.file_suffix = file_suffix
         self.keep_na_doc = keep_na_doc
         self._NOT_FOUND = None
 
@@ -44,15 +46,15 @@ def add(self, keys: List[int], docs: List['gnes_pb2.Document'], *args, **kwargs)
             dirs = os.path.join(self.data_path, str(k))
             if not os.path.exists(dirs):
                 os.makedirs(dirs)
-            file_type = self._get_file_type(d.doc_type)
+            self.file_suffix = self._get_file_type(d.doc_type)
             for i, chunk in enumerate(d.chunks):
-                with open(os.path.join(dirs, str(i)+file_type), 'wb') as f:
+                with open(os.path.join(dirs, str(i)+self.file_suffix), 'wb') as f:
                     f.write(chunk.raw)
 
     def query(self, keys: List[int], *args, **kwargs) -> List['gnes_pb2.Document']:
         """
         :param keys: list of doc id
-        :return: list of documents whose chunks contain all the GIFs of this doc
+        :return: list of documents whose chunks field contain all the GIFs of this doc(one GIF per chunk)
         """
         res = []
         for k in keys: