Skip to content
This repository has been archived by the owner on Feb 22, 2020. It is now read-only.

Commit

Permalink
Merge pull request #186 from gnes-ai/fix-empty-chunk-inde
Browse files Browse the repository at this point in the history
fix(indexer): fix vec np.concat
  • Loading branch information
mergify[bot] authored Sep 2, 2019
2 parents 3524152 + a465825 commit 3a18111
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 15 deletions.
24 changes: 12 additions & 12 deletions gnes/indexer/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def query(self, keys: np.ndarray, top_k: int, *args, **kwargs) -> List[List[Tupl
def query_and_score(self, q_chunks: List['gnes_pb2.Chunk'], top_k: int, *args, **kwargs) -> List[
'gnes_pb2.Response.QueryResponse.ScoredResult']:
vecs = [blob2array(c.embedding) for c in q_chunks]
queried_results = self.query(np.concatenate(vecs, 0), top_k=top_k)
queried_results = self.query(np.stack(vecs), top_k=top_k)
results = []
for q_chunk, topk_chunks in zip(q_chunks, queried_results):
for _doc_id, _offset, _weight, _relevance in topk_chunks:
Expand Down Expand Up @@ -115,16 +115,16 @@ def eq1(q_chunk: 'gnes_pb2.Chunk', d_chunk: 'gnes_pb2.Chunk',
score.explained = json.dumps({
'name': 'chunk-eq1',
'operand': [{'name': 'd_chunk_weight',
'value': d_chunk.weight,
'value': float(d_chunk.weight),
'doc_id': d_chunk.doc_id,
'offset': d_chunk.offset},
{'name': 'q_chunk_weight',
'value': q_chunk.weight,
'value': float(q_chunk.weight),
'offset': q_chunk.offset},
{'name': 'relevance',
'value': relevance}],
'value': float(relevance)}],
'op': 'prod',
'value': score.value
'value': float(score.value)
})
return score

Expand Down Expand Up @@ -152,18 +152,18 @@ def _cal_divergence(q_chunk: 'gnes_pb2.Chunk', d_chunk: 'gnes_pb2.Chunk'):
score.explained = json.dumps({
'name': 'chunk-eq2',
'operand': [{'name': 'd_chunk_weight',
'value': d_chunk.weight,
'value': float(d_chunk.weight),
'doc_id': d_chunk.doc_id,
'offset': d_chunk.offset},
{'name': 'q_chunk_weight',
'value': q_chunk.weight,
'value': float(q_chunk.weight),
'offset': q_chunk.offset},
{'name': 'relevance',
'value': relevance},
'value': float(relevance)},
{'name': 'offset_divergence',
'value': divergence}],
'value': float(divergence)}],
'op': 'prod',
'value': score.value
'value': float(score.value)
})
return score

Expand All @@ -184,10 +184,10 @@ def eq1(d: 'gnes_pb2.Document',
'name': 'doc-eq1',
'operand': [json.loads(s.explained),
{'name': 'doc_weight',
'value': d.weight,
'value': float(d.weight),
'doc_id': d.doc_id}],
'op': 'prod',
'value': s.value
'value': float(s.value)
})
return s

Expand Down
2 changes: 1 addition & 1 deletion gnes/router/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def apply(self, msg: 'gnes_pb2.Message', accum_msgs: List['gnes_pb2.Message'], *
'name': 'topk-reduce',
'op': self._reduce_op,
'operand': [json.loads(vv) for vv in v['explains']],
'value': r.score.value
'value': float(r.score.value)
})
self.set_key(r, k)

Expand Down
2 changes: 1 addition & 1 deletion gnes/service/encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def embed_chunks_in_docs(self, docs: Union[List['gnes_pb2.Document'], 'gnes_pb2.
'chunk content is in type: %s, dont kow how to handle that, ignored' % c.WhichOneof('content'))
chunks.append(c)

if do_encoding:
if do_encoding and contents:
embeds = self._model.encode(contents)
if len(chunks) != embeds.shape[0]:
raise ServiceError(
Expand Down
4 changes: 3 additions & 1 deletion gnes/service/indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,10 @@ def _handler_chunk_index(self, msg: 'gnes_pb2.Message'):
offsets += [c.offset for c in d.chunks]
weights += [c.weight for c in d.chunks]

# self.logger.info('%d %d %d %d' % (len(vecs), len(doc_ids), len(offsets), len(weights)))
# self.logger.info(np.stack(vecs).shape)
if vecs:
self._model.add(list(zip(doc_ids, offsets)), np.concatenate(vecs, 0), weights)
self._model.add(list(zip(doc_ids, offsets)), np.stack(vecs), weights)

def _handler_doc_index(self, msg: 'gnes_pb2.Message'):
self._model.add([d.doc_id for d in msg.request.index.docs],
Expand Down

0 comments on commit 3a18111

Please sign in to comment.