This repository has been archived by the owner on Jan 10, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patheval_mteb_results.py
166 lines (136 loc) · 6.21 KB
/
eval_mteb_results.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import json
from sentence_transformers import CrossEncoder
import numpy as np
import torch.nn as nn
# Load data from files
def load_json_file(file_path):
with open(file_path, "r", encoding="utf-8") as f:
return json.load(f)
# Save data to a JSON file
def save_json_file(data, file_path):
def convert_float32(obj):
if isinstance(obj, np.float32):
return float(obj)
raise TypeError(f"Object of type {type(obj)} is not JSON serializable")
with open(file_path, "w", encoding="utf-8") as f:
json.dump(data, f, indent=4, ensure_ascii=False, default=convert_float32)
# Main processing function
def process_files(
corpus_file, queries_file, relevant_docs_file, results_file, output_file
):
# Load input data
corpus = load_json_file(corpus_file)
queries = load_json_file(queries_file)
relevant_docs = load_json_file(relevant_docs_file)
results = load_json_file(results_file)
missing_top = 5
# Initialize cross-encoder model
model = CrossEncoder("cross-encoder/msmarco-MiniLM-L6-en-de-v1", max_length=512)
model.model = nn.DataParallel(model.model)
processed_results = {}
output_results = {}
for query_id, relevant in relevant_docs.items():
results_query = results.get(query_id, {})
query_text = queries.get(query_id, "")
if not query_text:
raise Exception("Query ID {query_id} not found in queries.json.")
for doc_id in results_query:
dock = corpus[doc_id]
dock
tmp_corpus = {doc_with_score: corpus[doc_with_score] for doc_with_score in results_query}
# Prepare pairs for scoring
pairs = [(query_text, corpus[doc_with_score]) for doc_with_score in results_query]
scores = model.predict(pairs, batch_size=600, show_progress_bar=True, convert_to_numpy=True)
# Collect results with scores
scored_results = {doc_id: score for doc_id, score in zip(tmp_corpus, scores)}
# Sort results by score in descending order
sorted_results = dict(
sorted(scored_results.items(), key=lambda item: item[1], reverse=True)
)
# Mark missing relevant documents in the top missing_top= 5
top_10_docs = list(sorted_results.keys())[:missing_top]
missing_relevant_docs = [
doc_id for doc_id in relevant if doc_id not in top_10_docs
]
# Compare with results from results file
comparison_results = {}
for doc_id in sorted_results:
cross_encoder_score = sorted_results[doc_id]
original_score = results.get(query_id, {}).get(doc_id)
if original_score is None:
raise Exception(
f"Document ID {doc_id} not found in results for query ID {query_id}.")
query_doc_list = results.get(query_id, {})
query_doc_list_sorted = sorted(
query_doc_list.items(), key=lambda item: item[1], reverse=True
)
comparison_results[doc_id] = {
"cross_encoder_score": cross_encoder_score,
"original_score": original_score,
"cross_encoder_index": list(sorted_results.keys()).index(doc_id),
"original_index": [doc[0] for doc in query_doc_list_sorted].index(
doc_id
)
if original_score is not None
else None,
"mismatch": False,
}
# Check for mismatched order
original_sorted_docs = sorted(
results.get(query_id, {}).items(), key=lambda item: item[1], reverse=True
)
cross_encoder_sorted_docs = list(sorted_results.keys())
for i, (doc_id, _) in enumerate(original_sorted_docs):
if (
i < len(cross_encoder_sorted_docs)
and doc_id != cross_encoder_sorted_docs[i]
and doc_id in comparison_results
):
comparison_results[doc_id]["mismatch"] = True
# Add data to output
processed_results[query_id] = {
"comparison_results": comparison_results,
"missing_relevant_docs_in_top_x": missing_relevant_docs,
}
cross_encoder_results = {
query_id: {
doc_id: scores["cross_encoder_score"]
for doc_id, scores in comparison_results.items()
}
}
processed_results[query_id]["cross_encoder_results"] = cross_encoder_results[
query_id
]
output_results[query_id] = {
doc_id: scores["cross_encoder_score"]
for doc_id, scores in comparison_results.items()
}
# Output the processed results
for query_id, result in processed_results.items():
print(f"Query ID: {query_id}")
print("Comparison Results:")
for doc_id, scores in result["comparison_results"].items():
if scores["mismatch"]:
print(
f" \033[91mDoc ID: {doc_id}, Cross-Encoder Score: {scores['cross_encoder_score']} (Index: {scores['cross_encoder_index']}), MTEB Score: {scores['original_score']} (Index: {scores['original_index']})\033[0m"
)
else:
print(
f" Doc ID: {doc_id}, Cross-Encoder Score: {scores['cross_encoder_score']} (Index: {scores['cross_encoder_index']}), MTEB Score: {scores['original_score']} (Index: {scores['original_index']})"
)
print(f"Missing Relevant Docs in Top {missing_top}:")
for doc_id in result["missing_relevant_docs_in_top_x"]:
print(f" Doc ID: {doc_id}")
print()
# Save the processed results
save_json_file(output_results, output_file)
save_json_file(processed_results, output_file.replace(".json", "_processed.json"))
print(f"Processed results saved to {output_file}")
# File paths
corpus_file = "output/fmplus/corpus.json"
queries_file = "output/fmplus/queries.json"
relevant_docs_file = "output/fmplus/relevant_docs.json"
results_file = "output/GermanDPR_default_predictions.json"
output_file = "output/GermanDPR_default_predictions_results.json"
# Run the script
process_files(corpus_file, queries_file, relevant_docs_file, results_file, output_file)