From 74d424b38589fe5038518229a17ca32f2dd97867 Mon Sep 17 00:00:00 2001 From: Nicolay Rusnachenko Date: Thu, 14 Jul 2022 14:58:22 +0300 Subject: [PATCH] #42 fixed --- arelight/pipelines/inference_bert.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arelight/pipelines/inference_bert.py b/arelight/pipelines/inference_bert.py index b6a9d33..7afa17f 100644 --- a/arelight/pipelines/inference_bert.py +++ b/arelight/pipelines/inference_bert.py @@ -48,12 +48,21 @@ def apply_core(self, input_data, pipeline_ctx): def __iter_predict_result(): samples = BaseRowsStorage.from_tsv(samples_filepath) + used_row_ids = set() + data = {"text_a": [], "text_b": [], "row_ids": []} for row_ind, row in samples: + + # Considering unique rows only. + if row["id"] in used_row_ids: + continue + data["text_a"].append(row['text_a']) data["text_b"].append(row['text_b']) data["row_ids"].append(row_ind) + + used_row_ids.add(row["id"]) batch_size = 10