Skip to content
This repository has been archived by the owner on Feb 22, 2020. It is now read-only.

Commit

Permalink
fix(indexer): add drop raw bytes option to leveldb
Browse files Browse the repository at this point in the history
  • Loading branch information
Larryjianfeng committed Jul 23, 2019
1 parent f84b5c7 commit a4b883a
Showing 1 changed file with 12 additions and 1 deletion.
13 changes: 12 additions & 1 deletion gnes/indexer/fulltext/leveldb.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,16 @@

class LVDBIndexer(BaseTextIndexer):

def __init__(self, data_path: str, keep_na_doc: bool = True, *args, **kwargs):
def __init__(self, data_path: str,
keep_na_doc: bool = True,
drop_raw_bytes: bool = False,
drop_chunk_blob: bool = False,
*args, **kwargs):
super().__init__(*args, **kwargs)
self.data_path = data_path
self.keep_na_doc = keep_na_doc
self.drop_raw_bytes = drop_raw_bytes
self.drop_chunk_blob = drop_chunk_blob
self._NOT_FOUND = None

def post_init(self):
Expand All @@ -40,6 +46,11 @@ def add(self, keys: List[int], docs: List['gnes_pb2.Document'], *args, **kwargs)
with self._db.write_batch() as wb:
for k, d in zip(keys, docs):
doc_id = pickle.dumps(k)
if self.drop_raw_bytes:
d.raw_bytes = b''
if self.drop_chunk_blob:
for i in range(len(d.chunks)):
d.chunks[i].ClearField('blob')
doc = d.SerializeToString()
wb.put(doc_id, doc)

Expand Down

0 comments on commit a4b883a

Please sign in to comment.