Skip to content

Commit

Permalink
Change 'sub_em' to 'acc'
Browse files Browse the repository at this point in the history
  • Loading branch information
ignorejjj committed Jul 4, 2024
1 parent b439c45 commit 15de563
Show file tree
Hide file tree
Showing 7 changed files with 10 additions and 10 deletions.
4 changes: 2 additions & 2 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ use_fid: False # whether to use FID, only valid in encoder-decoder model

# -------------------------------------------------Evaluation Settings------------------------------------------------#
# Metrics to evaluate the result
metrics: ['em','f1','sub_em','precision','recall']
metrics: ['em','f1','acc,'precision','recall']
# Specify setting for metric, will be called within certain metrics
metric_setting:
retrieval_recall_topk: 5
Expand Down Expand Up @@ -301,7 +301,7 @@ This section sets various settings used during evaluation. If you use a custom e

```yaml
# Metrics to evaluate the result
metrics: ['em','f1','sub_em','precision','recall']
metrics: ['em','f1','acc','precision','recall']
# Specify setting for metric, will be called within certain metrics
metric_setting:
retrieval_recall_topk: 5
Expand Down
4 changes: 2 additions & 2 deletions docs/introduction_for_beginners_en.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ config_dict = {
'model2path': {'e5': <retriever_path>, 'llama2-7B-chat': <generator_path>},
'generator_model': 'llama2-7B-chat',
'retrieval_method': 'e5',
'metrics': ['em', 'f1', 'sub_em'],
'metrics': ['em', 'f1', 'acc'],
'retrieval_topk': 1,
'save_intermediate_data': True
}
Expand Down Expand Up @@ -142,7 +142,7 @@ config_dict = {
'model2path': {'e5': <retriever_path>, 'llama2-7B-chat': <generator_path>},
'generator_model': 'llama2-7B-chat',
'retrieval_method': 'e5',
'metrics': ['em','f1','sub_em'],
'metrics': ['em','f1','acc'],
'retrieval_topk': 1,
'save_intermediate_data': True
}
Expand Down
2 changes: 1 addition & 1 deletion docs/introduction_for_beginners_zh.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ config_dict = {
'model2path': {'e5': <retriever_path>, 'llama2-7B-chat': <generator_path>},
'generator_model': 'llama2-7B-chat',
'retrieval_method': 'e5',
'metrics': ['em', 'f1', 'sub_em'],
'metrics': ['em', 'f1', 'acc'],
'retrieval_topk': 1,
'save_intermediate_data': True
}
Expand Down
2 changes: 1 addition & 1 deletion examples/methods/my_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ sc_config:

# -------------------------------------------------Evaluation Settings------------------------------------------------#
# Metrics to evaluate the result
metrics: [ 'em','f1','sub_em','precision','recall']
metrics: [ 'em','f1','acc','precision','recall']
# Specify setting for metric, will be called within certain metrics
metric_setting:
retrieval_recall_topk: 5
Expand Down
2 changes: 1 addition & 1 deletion examples/quick_start/simple_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
'model2path': {'e5': args.retriever_path, 'llama3-8B-instruct': args.model_path},
'generator_model': 'llama3-8B-instruct',
'retrieval_method': 'e5',
'metrics': ['em','f1','sub_em'],
'metrics': ['em','f1','acc'],
'retrieval_topk': 1,
'save_intermediate_data': True
}
Expand Down
2 changes: 1 addition & 1 deletion flashrag/config/basic_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ gpu_memory_utilization: 0.85 # ratio of gpu's memory usage for generator

# -------------------------------------------------Evaluation Settings------------------------------------------------#
# Metrics to evaluate the result
metrics: ['em','f1','sub_em','precision','recall','input_tokens']
metrics: ['em','f1','acc','precision','recall','input_tokens']
# Specify setting for metric, will be called within certain metrics
metric_setting:
retrieval_recall_topk: 5
Expand Down
4 changes: 2 additions & 2 deletions flashrag/evaluator/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def calculate_metric(self, data):
class Sub_ExactMatch(BaseMetric):
r"""Sub-Exact match measure whether the predicted answer contains the standard answer.
"""
metric_name = "sub_em"
metric_name = "acc"

def __init__(self, config):
super().__init__(config)
Expand Down Expand Up @@ -172,7 +172,7 @@ def calculate_metric(self, data):
metric_score_list = [self.calculate_sub_em(pred, golden_answers) for pred, golden_answers in zip(pred_list, golden_answers_list)]
sub_em_score = sum(metric_score_list) / len(metric_score_list)

return {"sub_em": sub_em_score}, metric_score_list
return {"acc": sub_em_score}, metric_score_list

class Retrieval_Recall(BaseMetric):
r"""The recall of the top-k retreived passages, we measure if any of the passage contain the answer string. """
Expand Down

0 comments on commit 15de563

Please sign in to comment.