Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Update] Update configurations #1704

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion configs/datasets/livecodebench/livecodebench_gen.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from mmengine.config import read_base

with read_base():
from .livecodebench_gen_b2b0fd import LCB_datasets # noqa: F401, F403
from .livecodebench_gen_6966bc import LCB_datasets # noqa: F401, F403
164 changes: 164 additions & 0 deletions configs/datasets/livecodebench/livecodebench_gen_6966bc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import (
LCBCodeGenerationDataset,
LCBCodeExecutionDataset,
LCBTestOutputPredictionDataset,
LCBCodeGenerationEvaluator,
LCBCodeExecutionEvaluator,
LCBTestOutputEvaluator
)
from opencompass.datasets.livecodebench import TestOutputPromptConstants


lcb_code_generation_reader_cfg = dict(
input_columns=[
'question_content',
'format_prompt',
],
# output_column='evaluation_sample',
output_column='question_id',
)

SYSTEM_MESSAGE_GENERIC = f'You are an expert Python programmer. You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. You will NOT return anything except for the program.'

prompt_template = '### Question:\n{question_content}\n\n{format_prompt}' + \
'### Answer: (use the provided format with backticks)\n\n'


# Code Generation Tasks
lcb_code_generation_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(
role='HUMAN',
prompt=prompt_template
)
]
)
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=1024)
)

lcb_code_generation_eval_cfg = dict(
evaluator=dict(
type=LCBCodeGenerationEvaluator,
num_process_evaluate=4,
timeout=6,
),
pred_role='BOT',
)

LCBCodeGeneration_dataset = dict(
type=LCBCodeGenerationDataset,
abbr='lcb_code_generation',
path='opencompass/code_generation_lite',
reader_cfg=lcb_code_generation_reader_cfg,
infer_cfg=lcb_code_generation_infer_cfg,
eval_cfg=lcb_code_generation_eval_cfg
)

# Code Execution Dataset
lcb_code_execution_reader_cfg = dict(
input_columns=[
'prompt',
],
output_column='evaluation_sample',
)

lcb_code_execution_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin=[
dict(
role='SYSTEM',
fallback_role='HUMAN',
prompt='You are an expert at Python programming, code execution, test case generation, and fuzzing.'
),
],
round=[
dict(
role='HUMAN',
prompt='{prompt}'
)
]
)
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=1024)
)

lcb_code_execution_eval_cfg = dict(
evaluator=dict(
type=LCBCodeExecutionEvaluator,
),
pred_role='BOT',
)

LCBCodeExecution_dataset = dict(
type=LCBCodeExecutionDataset,
abbr='lcb_code_execution',
path='opencompass/execution-v2',
reader_cfg=lcb_code_execution_reader_cfg,
infer_cfg=lcb_code_execution_infer_cfg,
eval_cfg=lcb_code_execution_eval_cfg,
)

# TestOuputput Dataset
lcb_test_output_reader_cfg = dict(
input_columns=[
'prompt',
],
output_column='evaluation_sample',
)

system_prompt = 'You are an expert Python programmer. You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. You will NOT return anything except for the program.'

lcb_test_output_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
# begin=[
# dict(
# role='SYSTEM',
# prompt=system_prompt
# ),
# ],
round=[
dict(
role='HUMAN',
prompt='{prompt}'
)
]
)
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=1024)
)

lcb_test_output_eval_cfg = dict(
evaluator=dict(
type=LCBTestOutputEvaluator,
),
pred_role='BOT',
)

LCBTestOutput_dataset = dict(
type=LCBTestOutputPredictionDataset,
abbr='lcb_test_output',
path='opencompass/test_generation',
reader_cfg=lcb_test_output_reader_cfg,
infer_cfg=lcb_test_output_infer_cfg,
eval_cfg=lcb_test_output_eval_cfg,
)

LCB_datasets = [
LCBCodeGeneration_dataset,
LCBCodeExecution_dataset,
LCBTestOutput_dataset,
]
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from mmengine.config import read_base

with read_base():
from .livecodebench_gen_b2b0fd import LCB_datasets # noqa: F401, F403
from .livecodebench_gen_6966bc import LCB_datasets # noqa: F401, F403
164 changes: 164 additions & 0 deletions opencompass/configs/datasets/livecodebench/livecodebench_gen_6966bc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import (
LCBCodeGenerationDataset,
LCBCodeExecutionDataset,
LCBTestOutputPredictionDataset,
LCBCodeGenerationEvaluator,
LCBCodeExecutionEvaluator,
LCBTestOutputEvaluator
)
from opencompass.datasets.livecodebench import TestOutputPromptConstants


lcb_code_generation_reader_cfg = dict(
input_columns=[
'question_content',
'format_prompt',
],
# output_column='evaluation_sample',
output_column='question_id',
)

SYSTEM_MESSAGE_GENERIC = f'You are an expert Python programmer. You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. You will NOT return anything except for the program.'

prompt_template = '### Question:\n{question_content}\n\n{format_prompt}' + \
'### Answer: (use the provided format with backticks)\n\n'


# Code Generation Tasks
lcb_code_generation_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(
role='HUMAN',
prompt=prompt_template
)
]
)
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=1024)
)

lcb_code_generation_eval_cfg = dict(
evaluator=dict(
type=LCBCodeGenerationEvaluator,
num_process_evaluate=4,
timeout=6,
),
pred_role='BOT',
)

LCBCodeGeneration_dataset = dict(
type=LCBCodeGenerationDataset,
abbr='lcb_code_generation',
path='opencompass/code_generation_lite',
reader_cfg=lcb_code_generation_reader_cfg,
infer_cfg=lcb_code_generation_infer_cfg,
eval_cfg=lcb_code_generation_eval_cfg
)

# Code Execution Dataset
lcb_code_execution_reader_cfg = dict(
input_columns=[
'prompt',
],
output_column='evaluation_sample',
)

lcb_code_execution_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin=[
dict(
role='SYSTEM',
fallback_role='HUMAN',
prompt='You are an expert at Python programming, code execution, test case generation, and fuzzing.'
),
],
round=[
dict(
role='HUMAN',
prompt='{prompt}'
)
]
)
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=1024)
)

lcb_code_execution_eval_cfg = dict(
evaluator=dict(
type=LCBCodeExecutionEvaluator,
),
pred_role='BOT',
)

LCBCodeExecution_dataset = dict(
type=LCBCodeExecutionDataset,
abbr='lcb_code_execution',
path='opencompass/execution-v2',
reader_cfg=lcb_code_execution_reader_cfg,
infer_cfg=lcb_code_execution_infer_cfg,
eval_cfg=lcb_code_execution_eval_cfg,
)

# TestOuputput Dataset
lcb_test_output_reader_cfg = dict(
input_columns=[
'prompt',
],
output_column='evaluation_sample',
)

system_prompt = 'You are an expert Python programmer. You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. You will NOT return anything except for the program.'

lcb_test_output_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
# begin=[
# dict(
# role='SYSTEM',
# prompt=system_prompt
# ),
# ],
round=[
dict(
role='HUMAN',
prompt='{prompt}'
)
]
)
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=1024)
)

lcb_test_output_eval_cfg = dict(
evaluator=dict(
type=LCBTestOutputEvaluator,
),
pred_role='BOT',
)

LCBTestOutput_dataset = dict(
type=LCBTestOutputPredictionDataset,
abbr='lcb_test_output',
path='opencompass/test_generation',
reader_cfg=lcb_test_output_reader_cfg,
infer_cfg=lcb_test_output_infer_cfg,
eval_cfg=lcb_test_output_eval_cfg,
)

LCB_datasets = [
LCBCodeGeneration_dataset,
LCBCodeExecution_dataset,
LCBTestOutput_dataset,
]
15 changes: 15 additions & 0 deletions opencompass/configs/models/chatglm/lmdeploy_glm4_9b.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from opencompass.models import TurboMindModel

models = [
dict(
type=TurboMindModel,
abbr='glm-4-9b-turbomind',
path='THUDM/glm-4-9b',
engine_config=dict(max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
max_seq_len=8192,
max_out_len=2048,
batch_size=16,
run_cfg=dict(num_gpus=1),
)
]
15 changes: 15 additions & 0 deletions opencompass/configs/models/qwen2_5/lmdeploy_qwen2_5_14b.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from opencompass.models import TurboMindModel

models = [
dict(
type=TurboMindModel,
abbr='qwen2.5-14b-turbomind',
path='Qwen/Qwen2.5-14B',
engine_config=dict(session_len=7168, max_batch_size=16, tp=2),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
batch_size=16,
run_cfg=dict(num_gpus=2),
)
]
15 changes: 15 additions & 0 deletions opencompass/configs/models/qwen2_5/lmdeploy_qwen2_5_32b.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from opencompass.models import TurboMindModel

models = [
dict(
type=TurboMindModel,
abbr='qwen2.5-32b-turbomind',
path='Qwen/Qwen2.5-32B',
engine_config=dict(session_len=7168, max_batch_size=16, tp=2),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
batch_size=16,
run_cfg=dict(num_gpus=2),
)
]
Loading
Loading