From 8ca08067637af3113abf019103e43583e2b1b28c Mon Sep 17 00:00:00 2001 From: KevinHuSh Date: Thu, 9 May 2024 15:32:24 +0800 Subject: [PATCH] fix disabled doc is still retreivalable (#695) ### What problem does this PR solve? Fix that disabled doc is still retreivalable ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- rag/nlp/search.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/rag/nlp/search.py b/rag/nlp/search.py index fcf2254f54c..216e9b74752 100644 --- a/rag/nlp/search.py +++ b/rag/nlp/search.py @@ -52,16 +52,21 @@ def _vector(self, txt, emb_mdl, sim=0.8, topk=10): def search(self, req, idxnm, emb_mdl=None): qst = req.get("question", "") bqry, keywords = self.qryr.question(qst) - if req.get("kb_ids"): - bqry.filter.append(Q("terms", kb_id=req["kb_ids"])) - if req.get("doc_ids"): - bqry.filter.append(Q("terms", doc_id=req["doc_ids"])) - if "available_int" in req: - if req["available_int"] == 0: - bqry.filter.append(Q("range", available_int={"lt": 1})) - else: - bqry.filter.append( - Q("bool", must_not=Q("range", available_int={"lt": 1}))) + def add_filters(bqry): + nonlocal req + if req.get("kb_ids"): + bqry.filter.append(Q("terms", kb_id=req["kb_ids"])) + if req.get("doc_ids"): + bqry.filter.append(Q("terms", doc_id=req["doc_ids"])) + if "available_int" in req: + if req["available_int"] == 0: + bqry.filter.append(Q("range", available_int={"lt": 1})) + else: + bqry.filter.append( + Q("bool", must_not=Q("range", available_int={"lt": 1}))) + return bqry + + bqry = add_filters(bqry) bqry.boost = 0.05 s = Search() @@ -117,8 +122,7 @@ def search(self, req, idxnm, emb_mdl=None): es_logger.info("TOTAL: {}".format(self.es.getTotal(res))) if self.es.getTotal(res) == 0 and "knn" in s: bqry, _ = self.qryr.question(qst, min_match="10%") - if req.get("kb_ids"): - bqry.filter.append(Q("terms", kb_id=req["kb_ids"])) + bqry = add_filters(bqry) s["query"] = bqry.to_dict() s["knn"]["filter"] = bqry.to_dict() s["knn"]["similarity"] = 0.17