-
Notifications
You must be signed in to change notification settings - Fork 485
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Update with PMMEval * Update * Update __init__.py * Fix Bugs * Delete .pre-commit-config.yaml * Pull merge --------- Co-authored-by: liushz <[email protected]>
- Loading branch information
1 parent
f7dbe6b
commit 90efcf2
Showing
38 changed files
with
2,200 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
from mmengine.config import read_base | ||
|
||
from opencompass.models import HuggingFacewithChatTemplate | ||
|
||
|
||
with read_base(): | ||
from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_7b_instruct import models | ||
|
||
# from opencompass.configs.datasets.PMMEval.flores_gen import PMMEval_flores_datasets | ||
# from opencompass.configs.datasets.PMMEval.humanevalxl_gen import PMMEval_HumanEvalXL_datasets | ||
# from opencompass.configs.datasets.PMMEval.mgsm_gen import PMMEval_MGSM_datasets | ||
# from opencompass.configs.datasets.PMMEval.mhellaswag_gen import PMMEval_MHellaswag_datasets | ||
# from opencompass.configs.datasets.PMMEval.mifeval_gen import PMMEval_MIFEval_datasets | ||
# from opencompass.configs.datasets.PMMEval.mlogiqa_gen import PMMEval_MLogiQA_datasets | ||
# from opencompass.configs.datasets.PMMEval.mmmlu_gen import PMMEval_MMMLU_datasets | ||
# from opencompass.configs.datasets.PMMEval.xnli import PMMEval_XNLI_datasets | ||
|
||
from opencompass.configs.datasets.PMMEval.pmmeval_gen import PMMEval_datasets | ||
|
||
from opencompass.configs.summarizers.PMMEval import summarizer | ||
|
||
|
||
# datasets = PMMEval_flores_datasets | ||
# datasets = PMMEval_HumanEvalXL_datasets | ||
# datasets = PMMEval_MGSM_datasets | ||
# datasets = PMMEval_MHellaswag_datasets | ||
# datasets = PMMEval_MIFEval_datasets | ||
# datasets = PMMEval_MLogiQA_datasets | ||
# datasets = PMMEval_MMMLU_datasets | ||
# datasets = PMMEval_XNLI_datasets | ||
|
||
datasets = PMMEval_datasets |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
from mmengine.config import read_base | ||
|
||
with read_base(): | ||
from .flores_gen_2697d7 import PMMEval_flores_datasets |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
from opencompass.openicl.icl_prompt_template import PromptTemplate | ||
from opencompass.openicl.icl_retriever import ZeroRetriever | ||
from opencompass.openicl.icl_inferencer import GenInferencer | ||
from opencompass.datasets.PMMEval import PMMEvalFloresDataset, PMMEvalFloresEvaluator, pmmeval_flores_postprocess | ||
|
||
NATURAL_LANGUAGE_FULLNAMES_FLORES = ['Chinese', 'Arabic', 'Spanish', 'French', 'Japanese', 'Korean', 'Portuguese', 'Thai', 'Vietnamese'] | ||
|
||
PROMPT = { | ||
"Chinese": "将这个句子从英语翻译成中文。\n\n{src}", | ||
"Arabic": "ترجم هذه الجملة من الإنجليزية إلى العربية.\n\n{src}", | ||
"Spanish": "Traduce esta oración del inglés al español.\n\n{src}", | ||
"Japanese": "この文を英語から日本語に翻訳してください。\n\n{src}", | ||
"Korean": "이 문장을 영어에서 한국어로 번역하세요.\n\n{src}", | ||
"Thai": "แปลประโยคนี้จากภาษาอังกฤษเป็นภาษาไทย.\n\n{src}", | ||
"French": "Traduisez cette phrase de l'anglais en français.\n\n{src}", | ||
"Portuguese": "Traduza esta frase do inglês para o português.\n\n{src}", | ||
"Vietnamese": "Dịch câu này từ tiếng Anh sang tiếng Việt.\n\n{src}" | ||
} | ||
|
||
PMMEval_flores_datasets = list() | ||
|
||
# Add flores_200 | ||
|
||
PMMEval_flores_reader_cfg = dict( | ||
input_columns=['src'], | ||
output_column='tgt', | ||
test_split='test' | ||
) | ||
|
||
|
||
PMMEval_flores_datasets = list() | ||
|
||
for lang_fullname in NATURAL_LANGUAGE_FULLNAMES_FLORES: | ||
PMMEval_flores_infer_cfg = dict( | ||
prompt_template=dict( | ||
type=PromptTemplate, | ||
template=dict( | ||
round=[ | ||
dict( | ||
role='HUMAN', | ||
prompt=PROMPT[lang_fullname] | ||
) | ||
] | ||
) | ||
), | ||
retriever=dict(type=ZeroRetriever), | ||
inferencer=dict(type=GenInferencer), | ||
) | ||
|
||
PMMEval_flores_eval_cfg = dict( | ||
evaluator=dict(type=PMMEvalFloresEvaluator), | ||
pred_role='BOT', | ||
pred_postprocessor=dict(type=pmmeval_flores_postprocess, lang_fullname=lang_fullname) | ||
) | ||
|
||
PMMEval_flores_datasets.append( | ||
dict( | ||
abbr=f'flores-{lang_fullname}', | ||
type=PMMEvalFloresDataset, | ||
path='P-MMEval', | ||
lang_fullname=lang_fullname, | ||
reader_cfg=PMMEval_flores_reader_cfg, | ||
infer_cfg=PMMEval_flores_infer_cfg, | ||
eval_cfg=PMMEval_flores_eval_cfg) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
from mmengine.config import read_base | ||
|
||
with read_base(): | ||
from .humanevalxl_gen_4dfef4 import PMMEval_HumanEvalXL_datasets |
49 changes: 49 additions & 0 deletions
49
opencompass/configs/datasets/PMMEval/humanevalxl_gen_bdec92.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
from opencompass.openicl.icl_prompt_template import PromptTemplate | ||
from opencompass.openicl.icl_retriever import ZeroRetriever | ||
from opencompass.openicl.icl_inferencer import GenInferencer | ||
from opencompass.datasets.PMMEval import PMMEvalHumanEvalXLDataset, PMMEvalHumanEvalXLEvaluator | ||
|
||
NATURAL_LANGUAGE_FULLNAMES = ['English', 'Chinese', 'Arabic', 'Spanish', 'French', 'Japanese', 'Korean', 'Portuguese', 'Thai', 'Vietnamese'] | ||
|
||
PMMEval_HumanEvalXL_datasets = list() | ||
|
||
PMMEval_HumanEvalXL_reader_cfg = dict( | ||
input_columns=['task_id', 'prompt', 'entry_point', 'test', 'language', 'description', 'natural_language'], | ||
output_column='declaration', | ||
test_split='test' | ||
) | ||
|
||
PMMEval_HumanEvalXL_infer_cfg = dict( | ||
prompt_template=dict( | ||
type=PromptTemplate, | ||
template='{prompt}'), | ||
retriever=dict(type=ZeroRetriever), | ||
inferencer=dict(type=GenInferencer), | ||
) | ||
|
||
|
||
PMMEval_HumanEvalXL_datasets = list() | ||
|
||
for lang_fullname in NATURAL_LANGUAGE_FULLNAMES: | ||
for program_lang in ['python', 'java', 'javascript']: | ||
|
||
PMMEval_HumanEvalXL_eval_cfg = dict( | ||
evaluator=dict( | ||
type=PMMEvalHumanEvalXLEvaluator, | ||
language=program_lang, | ||
text_language=lang_fullname, | ||
ip_address='localhost', | ||
port=5001), | ||
pred_role='BOT') | ||
|
||
PMMEval_HumanEvalXL_datasets.append( | ||
dict( | ||
abbr=f'humanevalxl-{program_lang}-{lang_fullname}', | ||
type=PMMEvalHumanEvalXLDataset, | ||
path='P-MMEval', | ||
lang=lang_fullname, | ||
program_lang=program_lang, | ||
reader_cfg=PMMEval_HumanEvalXL_reader_cfg, | ||
infer_cfg=PMMEval_HumanEvalXL_infer_cfg, | ||
eval_cfg=PMMEval_HumanEvalXL_eval_cfg) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
from mmengine.config import read_base | ||
|
||
with read_base(): | ||
from .mgsm_gen_679720 import PMMEval_MGSM_datasets |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
from opencompass.openicl.icl_prompt_template import PromptTemplate | ||
from opencompass.openicl.icl_retriever import ZeroRetriever | ||
from opencompass.openicl.icl_inferencer import GenInferencer | ||
from opencompass.datasets.PMMEval import PMMEvalMGSMDataset, PMMEvalMGSMEvaluator | ||
|
||
NATURAL_LANGUAGE_CODES = ['en', 'zh', 'ar', 'es', 'fr', 'ja', 'ko', 'pt', 'th', 'vi'] | ||
|
||
LANG_TO_INSTRUCTIONS = { | ||
"en": "Solve this math problem. Give the reasoning steps before giving the final answer on the last line by itself in the format of \"The answer is \". Do not add anything other than the integer answer after \"The answer is \".\n\n{question}", | ||
"es": "Solve this math problem. Give the reasoning steps before giving the final answer on the last line by itself in the format of \"La respuesta es \". Do not add anything other than the integer answer after \"La respuesta es \".\n\n{question}", | ||
"fr": "Solve this math problem. Give the reasoning steps before giving the final answer on the last line by itself in the format of \"La réponse est \". Do not add anything other than the integer answer after \"La réponse est \".\n\n{question}", | ||
"zh": "Solve this math problem. Give the reasoning steps before giving the final answer on the last line by itself in the format of \"答案是 \". Do not add anything other than the integer answer after \"答案是 \".\n\n{question}", | ||
"ja": "Solve this math problem. Give the reasoning steps before giving the final answer on the last line by itself in the format of \"答えは \". Do not add anything other than the integer answer after \"答えは \".\n\n{question}", | ||
"th": "Solve this math problem. Give the reasoning steps before giving the final answer on the last line by itself in the format of \"คำตอบคือ \". Do not add anything other than the integer answer after \"คำตอบคือ \".\n\n{question}", | ||
"ko": "Solve this math problem. Give the reasoning steps before giving the final answer on the last line by itself in the format of \"답변은 \". Do not add anything other than the integer answer after \"답변은 \".\n\n{question}", | ||
"pt": "Solve this math problem. Give the reasoning steps before giving the final answer on the last line by itself in the format of \"A resposta é \". Do not add anything other than the integer answer after \"A resposta é \".\n\n{question}", | ||
"vi": "Solve this math problem. Give the reasoning steps before giving the final answer on the last line by itself in the format of \"Câu trả lời là \". Do not add anything other than the integer answer after \"Câu trả lời là \".\n\n{question}", | ||
"ar": "Solve this math problem. Give the reasoning steps before giving the final answer on the last line by itself in the format of \"الجواب هو \". Do not add anything other than the integer answer after \"الجواب هو \".\n\n{question}" | ||
} | ||
|
||
PMMEval_MGSM_datasets = list() | ||
|
||
# Add flores_200 | ||
|
||
PMMEval_MGSM_reader_cfg = dict( | ||
input_columns=['question'], | ||
output_column='answer', | ||
test_split='test' | ||
) | ||
|
||
PMMEval_MGSM_eval_cfg = dict( | ||
evaluator=dict(type=PMMEvalMGSMEvaluator), | ||
pred_role='BOT') | ||
|
||
|
||
for lang_code in NATURAL_LANGUAGE_CODES: | ||
PMMEval_MGSM_infer_cfg = dict( | ||
prompt_template=dict( | ||
type=PromptTemplate, | ||
template=dict( | ||
round=[ | ||
dict( | ||
role='HUMAN', | ||
prompt=LANG_TO_INSTRUCTIONS[lang_code] | ||
) | ||
] | ||
) | ||
), | ||
retriever=dict(type=ZeroRetriever), | ||
inferencer=dict(type=GenInferencer), | ||
) | ||
|
||
PMMEval_MGSM_datasets.append( | ||
dict( | ||
abbr=f'mgsm-{lang_code}', | ||
type=PMMEvalMGSMDataset, | ||
path='P-MMEval', | ||
lang=lang_code, | ||
reader_cfg=PMMEval_MGSM_reader_cfg, | ||
infer_cfg=PMMEval_MGSM_infer_cfg, | ||
eval_cfg=PMMEval_MGSM_eval_cfg) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
from mmengine.config import read_base | ||
|
||
with read_base(): | ||
from .mhellaswag_gen_1a6b73 import PMMEval_MHellaswag_datasets |
54 changes: 54 additions & 0 deletions
54
opencompass/configs/datasets/PMMEval/mhellaswag_gen_1a6b73.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
from opencompass.openicl.icl_prompt_template import PromptTemplate | ||
from opencompass.openicl.icl_retriever import ZeroRetriever | ||
from opencompass.openicl.icl_inferencer import GenInferencer | ||
from opencompass.datasets.PMMEval import PMMEvalMHellaswagDataset, PMMEvalMHellaswagEvaluator, pmmeval_mhellaswag_postprocess | ||
|
||
NATURAL_LANGUAGE_CODES = ['en', 'zh', 'ar', 'es', 'fr', 'ja', 'ko', 'pt', 'th', 'vi'] | ||
|
||
PMMEVAL_MHELLASWAG_TEMPLATE = "Input: {ctx}\nOptions: \nA. {option_1}\nB. {option_2}\nC. {option_3}\nD. {option_4}\nPick the correct ending for the sentence from A, B, C, and D, and return it in the following JSON format:\n{\"answer\": \"[choice]\"}\nwhere [choice] must be one of A, B, C or D." | ||
|
||
PMMEval_MHellaswag_datasets = list() | ||
|
||
PMMEval_MHellaswag_reader_cfg = dict( | ||
input_columns=['ctx', 'option_1', 'option_2', 'option_3', 'option_4'], | ||
output_column='label', | ||
test_split='test' | ||
) | ||
|
||
PMMEval_MHellaswag_infer_cfg = dict( | ||
prompt_template=dict( | ||
type=PromptTemplate, | ||
template=dict( | ||
round=[ | ||
dict( | ||
role='HUMAN', | ||
prompt=PMMEVAL_MHELLASWAG_TEMPLATE | ||
) | ||
] | ||
) | ||
), | ||
retriever=dict(type=ZeroRetriever), | ||
inferencer=dict(type=GenInferencer), | ||
) | ||
|
||
|
||
PMMEval_MHellaswag_datasets = list() | ||
|
||
|
||
for lang_code in NATURAL_LANGUAGE_CODES: | ||
PMMEval_MHellaswag_eval_cfg = dict( | ||
evaluator=dict(type=PMMEvalMHellaswagEvaluator), | ||
pred_role='BOT', | ||
pred_postprocessor=dict(type=pmmeval_mhellaswag_postprocess, lang_code=lang_code) | ||
) | ||
|
||
PMMEval_MHellaswag_datasets.append( | ||
dict( | ||
abbr=f'mhellaswag-{lang_code}', | ||
type=PMMEvalMHellaswagDataset, | ||
path='P-MMEval', | ||
lang=lang_code, | ||
reader_cfg=PMMEval_MHellaswag_reader_cfg, | ||
infer_cfg=PMMEval_MHellaswag_infer_cfg, | ||
eval_cfg=PMMEval_MHellaswag_eval_cfg) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
from mmengine.config import read_base | ||
|
||
with read_base(): | ||
from .mifeval_gen_79f8fb import PMMEval_MIFEval_datasets |
51 changes: 51 additions & 0 deletions
51
opencompass/configs/datasets/PMMEval/mifeval_gen_79f8fb.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
from opencompass.openicl.icl_prompt_template import PromptTemplate | ||
from opencompass.openicl.icl_retriever import ZeroRetriever | ||
from opencompass.openicl.icl_inferencer import GenInferencer | ||
from opencompass.datasets.PMMEval import PMMEvalMIFEvalDataset, PMMEvalMIFEvalEvaluator, pmmeval_mifeval_postprocess | ||
|
||
NATURAL_LANGUAGE_CODES = ['en', 'zh', 'ar', 'es', 'fr', 'ja', 'ko', 'pt', 'th', 'vi'] | ||
|
||
PMMEVAL_MIFEVAL_TEMPLATE = "{prompt}" | ||
|
||
PMMEval_MIFEval_datasets = list() | ||
|
||
PMMEval_MIFEval_reader_cfg = dict( | ||
input_columns=['prompt', 'instruction_id_list', 'kwargs'], | ||
output_column=None, | ||
test_split='test' | ||
) | ||
|
||
|
||
PMMEval_MIFEval_infer_cfg = dict( | ||
prompt_template=dict( | ||
type=PromptTemplate, | ||
template=dict( | ||
round=[ | ||
dict( | ||
role='HUMAN', | ||
prompt=PMMEVAL_MIFEVAL_TEMPLATE | ||
) | ||
] | ||
) | ||
), | ||
retriever=dict(type=ZeroRetriever), | ||
inferencer=dict(type=GenInferencer), | ||
) | ||
|
||
for lang_code in NATURAL_LANGUAGE_CODES: | ||
PMMEval_MIFEval_eval_cfg = dict( | ||
evaluator=dict(type=PMMEvalMIFEvalEvaluator), | ||
pred_role='BOT', | ||
pred_postprocessor=dict(type=pmmeval_mifeval_postprocess, lang_code=lang_code) | ||
) | ||
|
||
PMMEval_MIFEval_datasets.append( | ||
dict( | ||
abbr=f'mifeval-{lang_code}', | ||
type=PMMEvalMIFEvalDataset, | ||
path='P-MMEval', | ||
lang=lang_code, | ||
reader_cfg=PMMEval_MIFEval_reader_cfg, | ||
infer_cfg=PMMEval_MIFEval_infer_cfg, | ||
eval_cfg=PMMEval_MIFEval_eval_cfg) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
from mmengine.config import read_base | ||
|
||
with read_base(): | ||
from .mlogiqa_gen_36c4f9 import PMMEval_MLogiQA_datasets |
50 changes: 50 additions & 0 deletions
50
opencompass/configs/datasets/PMMEval/mlogiqa_gen_36c4f9.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
from opencompass.openicl.icl_prompt_template import PromptTemplate | ||
from opencompass.openicl.icl_retriever import ZeroRetriever | ||
from opencompass.openicl.icl_inferencer import GenInferencer | ||
from opencompass.datasets.PMMEval import PMMEvalMLogiQADataset, PMMEvalMLogiQAEvaluator, pmmeval_mlogiqa_postprocess | ||
|
||
NATURAL_LANGUAGE_CODES = ['en', 'zh', 'ar', 'es', 'fr', 'ja', 'ko', 'pt', 'th', 'vi'] | ||
|
||
PMMEVAL_MLOGIQA_TEMPLATE = "Passage: {context}\nQuestion: {question}\nChoices:\nA.{option_1}\nB.{option_2}\nC.{option_3}\nD.{option_4}\nPlease choose the most suitable one among A, B, C and D as the answer to this question, and return it in the following JSON format:\n{'answer': '[choice]'}\nwhere [choice] must be one of A, B, C and D." | ||
|
||
PMMEval_MLogiQA_datasets = [] | ||
|
||
|
||
PMMEval_MLogiQA_reader_cfg = dict( | ||
input_columns=['context', 'question', 'option_1', 'option_2', 'option_3', 'option_4'], | ||
output_column='answer', | ||
train_split='test') | ||
|
||
PMMEval_MLogiQA_infer_cfg = dict( | ||
prompt_template=dict( | ||
type=PromptTemplate, | ||
template=dict( | ||
round=[ | ||
dict( | ||
role='HUMAN', | ||
prompt=PMMEVAL_MLOGIQA_TEMPLATE | ||
) | ||
] | ||
) | ||
), | ||
retriever=dict(type=ZeroRetriever), | ||
inferencer=dict(type=GenInferencer), | ||
) | ||
|
||
|
||
for lang_code in NATURAL_LANGUAGE_CODES: | ||
PMMEval_MLogiQA_eval_cfg = dict( | ||
evaluator=dict(type=PMMEvalMLogiQAEvaluator), | ||
pred_role='BOT', | ||
pred_postprocessor=dict(type=pmmeval_mlogiqa_postprocess, lang_code=lang_code)) | ||
|
||
PMMEval_MLogiQA_datasets.append( | ||
dict( | ||
abbr=f'mlogiqa-{lang_code}', | ||
type=PMMEvalMLogiQADataset, | ||
path='P-MMEval', | ||
lang=lang_code, | ||
reader_cfg=PMMEval_MLogiQA_reader_cfg, | ||
infer_cfg=PMMEval_MLogiQA_infer_cfg, | ||
eval_cfg=PMMEval_MLogiQA_eval_cfg) | ||
) |
Oops, something went wrong.