Skip to content
This repository has been archived by the owner on Feb 22, 2020. It is now read-only.

Commit

Permalink
fix(service): fix exception when no chunks
Browse files Browse the repository at this point in the history
  • Loading branch information
hanhxiao committed Aug 30, 2019
1 parent 5b7c9f1 commit 983b899
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 10 deletions.
12 changes: 7 additions & 5 deletions gnes/service/encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,19 @@ def embed_chunks_in_docs(self, docs: Union[List['gnes_pb2.Document'], 'gnes_pb2.
embeds = None

for d in docs:
if not d.chunks:
self.logger.warning('document (doc_id=%s) contains no chunks!' % d.doc_id)
continue

for c in d.chunks:
chunks.append(c)
if d.doc_type == gnes_pb2.Document.TEXT:
contents.append(c.text)
elif getattr(c, c.WhichOneof('content')) == 'blob':
contents.append(blob2array(c.blob))
else:
raise ServiceError(
'chunk content is in type: %s, dont kow how to handle that' % c.WhichOneof('content'))
else:
self.logger.warning('document (doc_id=%s) contains no chunks!' % d.doc_id)
self.logger.warning(
'chunk content is in type: %s, dont kow how to handle that, ignored' % c.WhichOneof('content'))
chunks.append(c)

if do_encoding:
embeds = self._model.encode(contents)
Expand Down
11 changes: 6 additions & 5 deletions gnes/service/indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,12 @@ def _handler_chunk_index(self, msg: 'gnes_pb2.Message'):
for d in msg.request.index.docs:
if not d.chunks:
self.logger.warning('document (doc_id=%s) contains no chunks!' % d.doc_id)
else:
vecs += [blob2array(c.embedding) for c in d.chunks]
doc_ids += [d.doc_id] * len(d.chunks)
offsets += [c.offset for c in d.chunks]
weights += [c.weight for c in d.chunks]
continue

vecs += [blob2array(c.embedding) for c in d.chunks]
doc_ids += [d.doc_id] * len(d.chunks)
offsets += [c.offset for c in d.chunks]
weights += [c.weight for c in d.chunks]

if vecs:
self._model.add(list(zip(doc_ids, offsets)), np.concatenate(vecs, 0), weights)
Expand Down

0 comments on commit 983b899

Please sign in to comment.