From 13f28ddce9d851cd125668f8d519d1a83266a5ae Mon Sep 17 00:00:00 2001 From: Vishnu Suresh Date: Mon, 25 Nov 2024 23:53:51 -0800 Subject: [PATCH 1/3] add override, final decorators to oss model handlers --- .../bfcl/model_handler/oss_model/base_oss_handler.py | 6 ++++-- .../bfcl/model_handler/oss_model/deepseek.py | 5 +++++ .../bfcl/model_handler/oss_model/deepseek_coder.py | 8 ++++++++ .../bfcl/model_handler/oss_model/gemma.py | 3 +++ .../bfcl/model_handler/oss_model/glaive.py | 4 +++- .../bfcl/model_handler/oss_model/glm.py | 5 +++++ .../bfcl/model_handler/oss_model/granite.py | 6 +++++- .../bfcl/model_handler/oss_model/hammer.py | 5 +++++ .../bfcl/model_handler/oss_model/hermes.py | 7 +++++++ .../bfcl/model_handler/oss_model/llama.py | 3 ++- .../bfcl/model_handler/oss_model/llama_fc.py | 6 ++++++ .../bfcl/model_handler/oss_model/minicpm.py | 3 +++ .../bfcl/model_handler/oss_model/phi.py | 4 +++- .../bfcl/model_handler/oss_model/qwen.py | 3 +++ .../bfcl/model_handler/oss_model/salesforce.py | 5 +++++ berkeley-function-call-leaderboard/pyproject.toml | 3 ++- 16 files changed, 69 insertions(+), 7 deletions(-) diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/base_oss_handler.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/base_oss_handler.py index e6e93da57..c463dc912 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/base_oss_handler.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/base_oss_handler.py @@ -3,6 +3,7 @@ import time import json from concurrent.futures import ThreadPoolExecutor +from overrides import EnforceOverrides, final import requests from bfcl.constant import RESULT_PATH, VERSION_PREFIX @@ -19,7 +20,7 @@ from tqdm import tqdm -class OSSHandler(BaseHandler): +class OSSHandler(BaseHandler, EnforceOverrides): def __init__(self, model_name, temperature, dtype="bfloat16") -> None: super().__init__(model_name, temperature) self.model_name_huggingface = model_name @@ -44,6 +45,7 @@ def decode_ast(self, result, language="Python"): def decode_execute(self, result): return default_decode_execute_prompting(result) + @final def batch_inference( self, test_entries: list[dict], @@ -218,6 +220,7 @@ def log_subprocess_output(pipe, stop_event): stdout_thread.join() stderr_thread.join() + @final def _multi_threaded_inference(self, test_case, include_input_log: bool, include_state_log: bool): """ This is a wrapper function to make sure that, if an error occurs during inference, the process does not stop. @@ -249,7 +252,6 @@ def _multi_threaded_inference(self, test_case, include_input_log: bool, include_ return result_to_write #### Prompting methods #### - def _format_prompt(self, messages, function): raise NotImplementedError( "OSS Models should implement their own prompt formatting." diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/deepseek.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/deepseek.py index f99a8d14a..03da82a2e 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/deepseek.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/deepseek.py @@ -1,4 +1,5 @@ from bfcl.model_handler.oss_model.base_oss_handler import OSSHandler +from overrides import overrides class DeepseekHandler(OSSHandler): @@ -9,6 +10,7 @@ class DeepseekHandler(OSSHandler): def __init__(self, model_name, temperature) -> None: super().__init__(model_name, temperature) + @overrides def decode_ast(self, result, language="Python"): result = result.strip() if result.startswith("```json"): @@ -17,6 +19,7 @@ def decode_ast(self, result, language="Python"): result = result[len("```python"):] return super().decode_ast(result, language) + @overrides def decode_execute(self, result): if result.startswith("```json"): result = result[len("```json"):] @@ -24,6 +27,7 @@ def decode_execute(self, result): result = result[len("```python"):] return super().decode_execute(result) + @overrides def _format_prompt(self, messages, function): """ "bos_token": { @@ -58,6 +62,7 @@ def _format_prompt(self, messages, function): return formatted_prompt + @overrides def _add_execution_results_prompting( self, inference_data: dict, execution_results: list[str], model_response_data: dict ) -> dict: diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/deepseek_coder.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/deepseek_coder.py index 7d7c54041..58c99e7bb 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/deepseek_coder.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/deepseek_coder.py @@ -1,6 +1,8 @@ import json import re +from overrides import overrides + from bfcl.model_handler.oss_model.base_oss_handler import OSSHandler from bfcl.model_handler.utils import ( combine_consecutive_user_prompts, @@ -18,12 +20,15 @@ class DeepseekCoderHandler(OSSHandler): def __init__(self, model_name, temperature) -> None: super().__init__(model_name, temperature) + @overrides def decode_ast(self, result, language="Python"): return result + @overrides def decode_execute(self, result): return convert_to_function_call(result) + @overrides def _format_prompt(self, messages, function): """ "bos_token": { @@ -105,6 +110,7 @@ def _format_prompt(self, messages, function): return formatted_prompt + @overrides def _pre_query_processing_prompting(self, test_entry: dict) -> dict: functions: list = test_entry["function"] test_category: str = test_entry["id"].rsplit("_", 1)[0] @@ -131,6 +137,7 @@ def _pre_query_processing_prompting(self, test_entry: dict) -> dict: return {"message": [], "function": functions} + @overrides def _parse_query_response_prompting(self, api_response: any) -> dict: model_responses = api_response.choices[0].text extracted_tool_calls = self.extract_tool_calls(model_responses) @@ -158,6 +165,7 @@ def _parse_query_response_prompting(self, api_response: any) -> dict: "output_token": api_response.usage.completion_tokens, } + @overrides def _add_assistant_message_prompting( self, inference_data: dict, model_response_data: dict ) -> dict: diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/gemma.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/gemma.py index 517d0206f..63a1b3175 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/gemma.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/gemma.py @@ -6,11 +6,13 @@ combine_consecutive_user_prompts, ) +from overrides import overrides class GemmaHandler(OSSHandler): def __init__(self, model_name, temperature) -> None: super().__init__(model_name, temperature) + @overrides def _format_prompt(self, messages, function): """ "bos_token": "", @@ -25,6 +27,7 @@ def _format_prompt(self, messages, function): return formatted_prompt + @overrides def _pre_query_processing_prompting(self, test_entry: dict) -> dict: functions: list = test_entry["function"] test_category: str = test_entry["id"].rsplit("_", 1)[0] diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/glaive.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/glaive.py index 93d54e215..cd0ee7cde 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/glaive.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/glaive.py @@ -1,12 +1,13 @@ from bfcl.model_handler.oss_model.base_oss_handler import OSSHandler from bfcl.model_handler.utils import convert_to_function_call import json - +from overrides import overrides class GlaiveHandler(OSSHandler): def __init__(self, model_name, temperature) -> None: super().__init__(model_name, temperature) + @overrides def decode_ast(self, result, language="Python"): function_call = result.split("")[-1] function_call = function_call.replace("'", "") @@ -22,6 +23,7 @@ def decode_ast(self, result, language="Python"): decoded_result = [{decoded_function["name"]: decoded_function["arguments"]}] return decoded_result + @overrides def decode_execute(self, result): function_call = result.split("")[-1] function_call = function_call.replace("'", "") diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/glm.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/glm.py index b43088743..7cb67fbe5 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/glm.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/glm.py @@ -9,12 +9,14 @@ func_doc_language_specific_pre_processing, ) +from overrides import overrides class GLMHandler(OSSHandler): def __init__(self, model_name, temperature) -> None: super().__init__(model_name, temperature) self.stop_token_ids = [151329, 151336, 151338] + @overrides def _format_prompt(self, messages, function): """ "chat_template": "[gMASK]{% for item in messages %}{% if item['tools'] is defined %}<|system|>\n你是一个名为 ChatGLM 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的,你的任务是针对用户的问题和要求提供适当的答复和支持。\n\n# 可用工具{% set tools = item['tools'] %}{% for tool in tools %}{% if tool['type'] == 'function' %}\n\n## {{ tool['function']['name'] }}\n\n{{ tool['function'] | tojson(indent=4) }}\n在调用上述函数时,请使用 Json 格式表示调用的参数。{% elif tool['type'] == 'python' %}\n\n## python\n\n当你向 `python` 发送包含 Python 代码的消息时,该代码将会在一个有状态的 Jupyter notebook 环境中执行。\n`python` 返回代码执行的输出,或在执行 60 秒后返回超时。\n`/mnt/data` 将会持久化存储你的文件。在此会话中,`python` 无法访问互联网。不要使用 `python` 进行任何网络请求或者在线 API 调用,这些在线内容的访问将不会成功。{% elif tool['type'] == 'simple_browser' %}\n\n## simple_browser\n\n你可以使用 `simple_browser` 工具。该工具支持以下函数:\n`search(query: str, recency_days: int)`:使用搜索引擎进行查询并显示结果,可以使用 `recency_days` 参数控制搜索内容的时效性。\n`mclick(ids: list[int])`:获取一系列指定 id 的页面内容。每次调用时,须选择3-10个页面。选择多个角度的页面,同时尽可能选择可信任的信息来源。考虑到部分页面是无法加载的,你也可以多打开一些可能有用的页面而不用担心内容过多。\n`open_url(url: str)`:打开指定的 URL。\n\n使用 `【{引用 id}†{引用文本}】` 来引用内容。\n\n操作步骤:1. 使用 `search` 来获得信息列表; 2. 使用 `mclick` 来获取指定 ID 页面的内容; 3. 根据获得的内容进行回复。在回复中应当引用信息来源。\n 如果用户提供了 URL,也可以用 `open_url` 直接打开页面。\n如果初次搜索结果没有找到合适的信息,也可以再次使用 `search` 进行搜索。{% elif tool['type'] == 'cogview' %}\n\n## cogview\n\n如果用户的请求中包含了对图像的描述,你可以使用 `cogview` 来生成图像并展示给用户。你需要向 `cogview` 发送图像描述,规则:\n- 发送给 `cogview` 的消息必须使用英语。用户的中文描述必须完全翻译为英语。\n- 应当尽可能详细地描述图像生成的需求,需求描述约 100 英文单词。\n- 保持用户原始描述的意图。不要虚构内容或者没见过的人物。\n- 如无特殊说明,所在地为中国,持有中国立场并遵循中国社会主义价值观。{% endif %}{% endfor %}{% endif %}{% if item['content'] %}<|{{ item['role'] }}|>{{ item['metadata'] }}\n{{ item['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}", @@ -34,6 +36,7 @@ def _format_prompt(self, messages, function): return formatted_prompt + @overrides def _pre_query_processing_prompting(self, test_entry: dict) -> dict: functions: list = test_entry["function"] test_category: str = test_entry["id"].rsplit("_", 1)[0] @@ -44,6 +47,7 @@ def _pre_query_processing_prompting(self, test_entry: dict) -> dict: return {"message": [], "function": functions} + @overrides def decode_ast(self, result, language="Python"): args = result.split("\n") if len(args) == 1: @@ -52,6 +56,7 @@ def decode_ast(self, result, language="Python"): func = [{args[0]: json.loads(args[1])}] return func + @overrides def decode_execute(self, result): args = result.split("\n") if len(args) == 1: diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/granite.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/granite.py index 130216b78..7c4daa357 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/granite.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/granite.py @@ -8,11 +8,12 @@ func_doc_language_specific_pre_processing, ) - +from overrides import overrides class GraniteHandler(OSSHandler): def __init__(self, model_name, temperature) -> None: super().__init__(model_name, temperature) + @overrides def _format_prompt(self, messages, function): """ "chat_template": "{% set function_str = messages.get('functions_str', {}) %}\n{% set query = messages['query'] %}\n{% set sys_prompt = 'You are a helpful assistant with access to the following function calls. Your task is to produce a sequence of function calls necessary to generate response to the user utterance. Use the following function calls as required. ' %}\n{% set funcstr = function_str|join('\n') %}\n{{ 'SYSTEM: ' + sys_prompt + '\n<|function_call_library|>\n' + funcstr + '\n\nIf none of the functions are relevant or the given question lacks the parameters required by the function, please output \" {\"name\": \"no_function\", \"arguments\": {}}\".\n\nUSER: ' + query}}\n{% if add_generation_prompt %}\n{{ 'ASSISTANT:' }}{% endif %}", @@ -40,6 +41,7 @@ def _format_prompt(self, messages, function): return prompt_str + @overrides def _pre_query_processing_prompting(self, test_entry: dict) -> dict: functions: list = test_entry["function"] test_category: str = test_entry["id"].rsplit("_", 1)[0] @@ -50,6 +52,7 @@ def _pre_query_processing_prompting(self, test_entry: dict) -> dict: return {"message": [], "function": functions} + @overrides def decode_ast(self, result, language="Python"): decoded_outputs = [] result = [ @@ -75,6 +78,7 @@ def decode_ast(self, result, language="Python"): return decoded_outputs + @overrides def decode_execute(self, result): decoded_outputs = [] result = [ diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/hammer.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/hammer.py index 02d9628c9..4df3454e3 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/hammer.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/hammer.py @@ -6,6 +6,7 @@ func_doc_language_specific_pre_processing, ) +from overrides import overrides TASK_INSTRUCTION = """You are a tool calling assistant. In order to complete the user's request, you need to select one or more appropriate tools from the following tools and fill in the correct values for the tool parameters. Your specific tasks are: 1. Make one or more function/tool calls to meet the request based on the question. 2. If none of the function can be used, point it out and refuse to answer. @@ -28,6 +29,7 @@ class HammerHandler(OSSHandler): def __init__(self, model_name, temperature) -> None: super().__init__(model_name, temperature) + @overrides def _format_prompt(self, messages, function): """ "chat_template": "{% set system_message = 'You are a helpful assistant.' %}{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ '<|im_start|>system\n' + system_message + '<|im_end|>\n' }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|im_start|>user\n' + content + '<|im_end|>\n<|im_start|>assistant\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|im_end|>' + '\n' }}{% endif %}{% endfor %}", @@ -86,6 +88,7 @@ def convert_to_format_tool(tools): return f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{content}<|im_end|>\n<|im_start|>assistant\n" + @overrides def decode_ast(self, result, language="Python"): result = result.replace("```", "") try: @@ -126,6 +129,7 @@ def xlam_json_to_python_tool_calls(tool_calls): return python_format + @overrides def decode_execute(self, result): result = result.replace("```", "") try: @@ -142,6 +146,7 @@ def decode_execute(self, result): function_call = self.xlam_json_to_python_tool_calls(tool_calls) return function_call + @overrides def _pre_query_processing_prompting(self, test_entry: dict) -> dict: functions: list = test_entry["function"] test_category: str = test_entry["id"].rsplit("_", 1)[0] diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/hermes.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/hermes.py index 46f25a66e..750663dbf 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/hermes.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/hermes.py @@ -2,6 +2,8 @@ from bfcl.model_handler.utils import convert_to_tool, func_doc_language_specific_pre_processing from bfcl.model_handler.constant import GORILLA_TO_OPENAPI from bfcl.model_handler.model_style import ModelStyle + +from overrides import overrides import json import inspect @@ -13,6 +15,7 @@ def __init__(self, model_name, temperature) -> None: if model_name == "NousResearch/Hermes-2-Pro-Llama-3-8B": self.dtype = "float16" + @overrides def _format_prompt(self, messages, function): # Hermes use Langchain to OpenAI conversion. It does not use tool call but function call. function = convert_to_tool(function, GORILLA_TO_OPENAPI, ModelStyle.OSSMODEL) @@ -49,6 +52,7 @@ def _format_prompt(self, messages, function): return formatted_prompt + @overrides def decode_ast(self, result, language="Python"): lines = result.split("\n") flag = False @@ -66,6 +70,7 @@ def decode_ast(self, result, language="Python"): flag = False return func_call + @overrides def decode_execute(self, result): lines = result.split("\n") flag = False @@ -91,6 +96,7 @@ def decode_execute(self, result): ) return execution_list + @overrides def _pre_query_processing_prompting(self, test_entry: dict) -> dict: functions: list = test_entry["function"] test_category: str = test_entry["id"].rsplit("_", 1)[0] @@ -101,6 +107,7 @@ def _pre_query_processing_prompting(self, test_entry: dict) -> dict: return {"message": [], "function": functions} + @overrides def _add_execution_results_prompting( self, inference_data: dict, execution_results: list[str], model_response_data: dict ) -> dict: diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/llama.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/llama.py index bad3c9731..c793419a6 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/llama.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/llama.py @@ -1,5 +1,5 @@ from bfcl.model_handler.oss_model.base_oss_handler import OSSHandler - +from overrides import overrides # Note: This is the handler for the Llama models in prompring mode. # For function call mode, use LlamaFCHandler instead. @@ -8,6 +8,7 @@ class LlamaHandler(OSSHandler): def __init__(self, model_name, temperature) -> None: super().__init__(model_name, temperature) + @overrides def _format_prompt(self, messages, function): formatted_prompt = "<|begin_of_text|>" diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/llama_fc.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/llama_fc.py index 0e1719910..d6821b213 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/llama_fc.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/llama_fc.py @@ -3,6 +3,8 @@ from bfcl.model_handler.oss_model.base_oss_handler import OSSHandler from bfcl.model_handler.utils import func_doc_language_specific_pre_processing +from overrides import overrides + # TODO: Merge with LlamaHandler @@ -11,6 +13,7 @@ def __init__(self, model_name, temperature) -> None: super().__init__(model_name, temperature) self.model_name_huggingface = model_name.replace("-FC", "") + @overrides def _format_prompt(self, messages, function): """ "bos_token": "<|begin_of_text|>", @@ -173,6 +176,7 @@ def _format_prompt(self, messages, function): return formatted_prompt + @overrides def decode_ast(self, result, language="Python"): result = result.replace("<|python_tag|>", "") # Llama sometimes separates the function calls with `;` and sometimes with `,` @@ -198,6 +202,7 @@ def decode_ast(self, result, language="Python"): return decoded_output + @overrides def decode_execute(self, result): result = result.replace("<|python_tag|>", "") # Llama sometimes separates the function calls with `;` and sometimes with `,` @@ -219,6 +224,7 @@ def decode_execute(self, result): return execution_list + @overrides def _pre_query_processing_prompting(self, test_entry: dict) -> dict: functions: list = test_entry["function"] test_category: str = test_entry["id"].rsplit("_", 1)[0] diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm.py index 521492598..cd0437350 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm.py @@ -1,9 +1,12 @@ from bfcl.model_handler.oss_model.base_oss_handler import OSSHandler +from overrides import overrides + class MiniCPMHandler(OSSHandler): def __init__(self, model_name, temperature) -> None: super().__init__(model_name, temperature) + @overrides def _format_prompt(self, messages, function): """ "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}" diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/phi.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/phi.py index 0aa7a0c10..4b75912dd 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/phi.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/phi.py @@ -6,11 +6,12 @@ system_prompt_pre_processing_chat_model, ) - +from overrides import overrides class PhiHandler(OSSHandler): def __init__(self, model_name, temperature) -> None: super().__init__(model_name, temperature) + @overrides def _format_prompt(self, messages, function): if "Phi-3-small" in self.model_name: # Phi-3-small @@ -35,6 +36,7 @@ def _format_prompt(self, messages, function): return formatted_prompt + @overrides def _pre_query_processing_prompting(self, test_entry: dict) -> dict: functions: list = test_entry["function"] test_category: str = test_entry["id"].rsplit("_", 1)[0] diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/qwen.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/qwen.py index 4118b266c..5b40a932c 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/qwen.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/qwen.py @@ -1,10 +1,13 @@ from bfcl.model_handler.oss_model.base_oss_handler import OSSHandler +from overrides import overrides + class QwenHandler(OSSHandler): def __init__(self, model_name, temperature) -> None: super().__init__(model_name, temperature) + @overrides def _format_prompt(self, messages, function): # Qwen is using its prompting mode, not the tool use mode """ diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/salesforce.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/salesforce.py index 2addde371..17b065695 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/salesforce.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/salesforce.py @@ -5,6 +5,7 @@ from bfcl.model_handler.oss_model.base_oss_handler import OSSHandler from openai import OpenAI +from overrides import overrides class SalesforceHandler(OSSHandler): def __init__(self, model_name, temperature) -> None: @@ -14,6 +15,7 @@ def __init__(self, model_name, temperature) -> None: config = xLAMConfig(base_url=f"http://localhost:{VLLM_PORT}/v1/", model=self.model_name) self.client = xLAMChatCompletion.from_config(config) + @overrides def decode_ast(self, result, language="Python"): decoded_output = [] for invoked_function in result: @@ -22,6 +24,7 @@ def decode_ast(self, result, language="Python"): decoded_output.append({name: params}) return decoded_output + @overrides def decode_execute(self, result): if isinstance(result, list): tool_calls = result @@ -32,6 +35,7 @@ def decode_execute(self, result): function_call = self.xlam_json_to_python_tool_calls(tool_calls) return function_call + @overrides def _parse_query_response_prompting(self, api_response: any) -> dict: if api_response["choices"][0]["message"]["tool_calls"] != []: return { @@ -99,6 +103,7 @@ def convert_to_dict(self, input_str): return result_list + @overrides def _query_prompting(self, inference_data: dict): function: list[dict] = inference_data["function"] message: list[dict] = inference_data["message"] diff --git a/berkeley-function-call-leaderboard/pyproject.toml b/berkeley-function-call-leaderboard/pyproject.toml index 705a7538f..4b2e803fe 100644 --- a/berkeley-function-call-leaderboard/pyproject.toml +++ b/berkeley-function-call-leaderboard/pyproject.toml @@ -30,7 +30,8 @@ dependencies = [ "tabulate>=0.9.0", "google-cloud-aiplatform==1.72.0", "mpmath==1.3.0", - "tenacity==9.0.0" + "tenacity==9.0.0", + "overrides==7.7.0" ] [project.scripts] From 0ef0100d3bd2a9a131fe48e160fd790a777ce6c9 Mon Sep 17 00:00:00 2001 From: Pan Yinxu Date: Tue, 26 Nov 2024 15:10:28 +0800 Subject: [PATCH 2/3] Add minicpm3 4b FC model handler (#718) Add new model `openbmb/MiniCPM3-4B-FC` to the leaderboard. --------- Co-authored-by: Huanzhi (Hans) Mao --- .../CHANGELOG.md | 1 + berkeley-function-call-leaderboard/README.md | 3 +- .../bfcl/eval_checker/model_metadata.py | 21 +- .../bfcl/model_handler/constant.py | 1 + .../bfcl/model_handler/handler_map.py | 2 + .../oss_model/base_oss_handler.py | 8 +- .../bfcl/model_handler/oss_model/minicpm.py | 4 +- .../model_handler/oss_model/minicpm_fc.py | 341 ++++++++++++++++++ .../pyproject.toml | 3 +- 9 files changed, 372 insertions(+), 12 deletions(-) create mode 100644 berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm_fc.py diff --git a/berkeley-function-call-leaderboard/CHANGELOG.md b/berkeley-function-call-leaderboard/CHANGELOG.md index 0995ec2ff..e9df86b6e 100644 --- a/berkeley-function-call-leaderboard/CHANGELOG.md +++ b/berkeley-function-call-leaderboard/CHANGELOG.md @@ -2,6 +2,7 @@ All notable changes to the Berkeley Function Calling Leaderboard will be documented in this file. +- [Nov 25, 2024] [#718](https://github.com/ShishirPatil/gorilla/pull/718): Add new model `openbmb/MiniCPM3-4B-FC` to the leaderboard. - [Nov 25, 2024] [#697](https://github.com/ShishirPatil/gorilla/pull/697): Add the following new models to the leaderboard: - `deepseek-ai/DeepSeek-V2.5` - `deepseek-ai/DeepSeek-Coder-V2-Instruct-0724` diff --git a/berkeley-function-call-leaderboard/README.md b/berkeley-function-call-leaderboard/README.md index 5d54bd854..add6a514e 100644 --- a/berkeley-function-call-leaderboard/README.md +++ b/berkeley-function-call-leaderboard/README.md @@ -219,7 +219,8 @@ Below is _a table of models we support_ to run our leaderboard evaluation agains |Qwen/Qwen2.5-{1.5B,7B}-Instruct 💻| Prompt| |Qwen/Qwen2-{1.5B,7B}-Instruct 💻| Prompt| |Team-ACE/ToolACE-8B 💻| Function Calling| -|openbmb/MiniCPM3-4B 💻| Function Calling| +|openbmb/MiniCPM3-4B-FC 💻| Function Calling| +|openbmb/MiniCPM3-4B 💻| Prompt| |BitAgent/GoGoAgent 💻| Prompt| Here {MODEL} 💻 means the model needs to be hosted locally and called by vllm, {MODEL} means the models that are called API calls. For models with a trailing `-FC`, it means that the model supports function-calling feature. You can check out the table summarizing feature supports among different models [here](https://gorilla.cs.berkeley.edu/blogs/8_berkeley_function_calling_leaderboard.html#prompt). diff --git a/berkeley-function-call-leaderboard/bfcl/eval_checker/model_metadata.py b/berkeley-function-call-leaderboard/bfcl/eval_checker/model_metadata.py index 4ff77a356..74a134b77 100644 --- a/berkeley-function-call-leaderboard/bfcl/eval_checker/model_metadata.py +++ b/berkeley-function-call-leaderboard/bfcl/eval_checker/model_metadata.py @@ -686,7 +686,13 @@ "Apache-2.0", ], "openbmb/MiniCPM3-4B": [ - "MiniCPM3-4B (FC)", + "MiniCPM3-4B (Prompt)", + "https://huggingface.co/openbmb/MiniCPM3-4B", + "openbmb", + "Apache-2.0", + ], + "openbmb/MiniCPM3-4B-FC": [ + "MiniCPM3-4B-FC (FC)", "https://huggingface.co/openbmb/MiniCPM3-4B", "openbmb", "Apache-2.0", @@ -747,10 +753,10 @@ "gemini-1.5-pro-002-FC": 1.25, "gemini-1.5-pro-001": 1.25, "gemini-1.5-pro-001-FC": 1.25, - "gemini-1.5-flash-002": 0.075 , - "gemini-1.5-flash-002-FC": 0.075 , - "gemini-1.5-flash-001": 0.075 , - "gemini-1.5-flash-001-FC": 0.075 , + "gemini-1.5-flash-002": 0.075, + "gemini-1.5-flash-002-FC": 0.075, + "gemini-1.5-flash-001": 0.075, + "gemini-1.5-flash-001-FC": 0.075, "gemini-1.0-pro-002": 0.5, "gemini-1.0-pro-002-FC": 0.5, "databricks-dbrx-instruct": 2.25, @@ -826,10 +832,9 @@ # The latency of the open-source models are hardcoded here. # Because we do batching when generating the data, so the latency is not accurate from the result data. # This is the latency for the whole batch of data, when using 8 V100 GPUs. -OSS_LATENCY = { -} +OSS_LATENCY = {} -# All OSS models will have no cost shown on the leaderboard. +# All OSS models will have no cost shown on the leaderboard. NO_COST_MODELS = list(local_inference_handler_map.keys()) # The following models will also have no cost, even though they are queries through the API. NO_COST_MODELS += [ diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/constant.py b/berkeley-function-call-leaderboard/bfcl/model_handler/constant.py index d877c9b4d..5f7c25761 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/constant.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/constant.py @@ -150,4 +150,5 @@ "THUDM/glm-4-9b-chat", "ibm-granite/granite-20b-functioncalling", "yi-large-fc", + "openbmb/MiniCPM3-4B-FC", ] diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py b/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py index 962a98965..d75850ac3 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py @@ -9,6 +9,7 @@ from bfcl.model_handler.oss_model.llama import LlamaHandler from bfcl.model_handler.oss_model.llama_fc import LlamaFCHandler from bfcl.model_handler.oss_model.minicpm import MiniCPMHandler +from bfcl.model_handler.oss_model.minicpm_fc import MiniCPMFCHandler from bfcl.model_handler.oss_model.phi import PhiHandler from bfcl.model_handler.oss_model.qwen import QwenHandler from bfcl.model_handler.oss_model.salesforce import SalesforceHandler @@ -132,6 +133,7 @@ "Qwen/Qwen2.5-72B-Instruct": QwenHandler, "Team-ACE/ToolACE-8B": LlamaHandler, "openbmb/MiniCPM3-4B": MiniCPMHandler, + "openbmb/MiniCPM3-4B-FC": MiniCPMFCHandler, "deepseek-ai/DeepSeek-V2.5": DeepseekCoderHandler, "deepseek-ai/DeepSeek-Coder-V2-Instruct-0724": DeepseekCoderHandler, "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct": DeepseekCoderHandler, diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/base_oss_handler.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/base_oss_handler.py index c463dc912..c16c98c9b 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/base_oss_handler.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/base_oss_handler.py @@ -275,13 +275,19 @@ def _query_prompting(self, inference_data: dict): else: leftover_tokens_count = min(4096, self.max_context_length - input_token_count - 2) + extra_body = {} if hasattr(self, "stop_token_ids"): + extra_body["stop_token_ids"] = self.stop_token_ids + if hasattr(self, "skip_special_tokens"): + extra_body["skip_special_tokens"] = self.skip_special_tokens + + if len(extra_body) > 0: api_response = self.client.completions.create( model=self.model_name_huggingface, temperature=self.temperature, prompt=formatted_prompt, max_tokens=leftover_tokens_count, - extra_body={"stop_token_ids": self.stop_token_ids}, + extra_body=extra_body, ) else: api_response = self.client.completions.create( diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm.py index cd0437350..7d08e67a7 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm.py @@ -14,7 +14,9 @@ def _format_prompt(self, messages, function): formatted_prompt = "" for message in messages: - formatted_prompt += f"<|im_start|>{message['role']}\n{message['content']}<|im_end|>\n" + formatted_prompt += ( + f"<|im_start|>{message['role']}\n{message['content']}<|im_end|>\n" + ) formatted_prompt += f"<|im_start|>assistant\n" diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm_fc.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm_fc.py new file mode 100644 index 000000000..c08027722 --- /dev/null +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm_fc.py @@ -0,0 +1,341 @@ +import ast +import json +from typing import Dict, List + +import datamodel_code_generator +from bfcl.model_handler.constant import GORILLA_TO_OPENAPI +from bfcl.model_handler.oss_model.base_oss_handler import OSSHandler +from bfcl.model_handler.utils import ( + convert_to_tool, + func_doc_language_specific_pre_processing, + resolve_ast_call, +) +from datamodel_code_generator import DataModelType +from datamodel_code_generator.model import get_data_model_types +from datamodel_code_generator.parser.jsonschema import JsonSchemaParser +from overrides import overrides + + +class MiniCPMFCHandler(OSSHandler): + def __init__(self, model_name, temperature) -> None: + super().__init__(model_name, temperature) + self.stop_token_ids = [2, 73440] + self.skip_special_tokens = False + self.model_name_huggingface = model_name.replace("-FC", "") + + @overrides + def _format_prompt(self, messages, function): + """ + "chat_template": "{%- macro json_to_python_type(param_name, json_spec) %}\n{%- set basic_type_map = {\n 'string': 'str',\n 'number': 'float',\n 'integer': 'int',\n 'boolean': 'bool',\n 'null': 'None'\n} %}\n\n{%- if json_spec.enum %}\n {{- param_name|title }}\n{%- elif basic_type_map[json_spec.type] is defined %}\n {{- basic_type_map[json_spec.type] }}\n{%- elif json_spec.type == 'array' %}\n {{- 'List[' + json_to_python_type(param_name, json_spec['items']) + ']' }}\n{%- elif json_spec.type == 'object' %}\n {{- 'Dict[str, ' + json_to_python_type(param_name, json_spec.additionalProperties if json_spec.additionalProperties else 'Any') + ']' if not json_spec.properties else param_name|title }}\n{%- elif json_spec.type is iterable %}\n {{- 'Union[' }}\n {%- for t in json_spec.type %}\n {{- json_to_python_type(param_name, {'type': t}) }}\n {{- ', ' if not loop.last }}\n {%- endfor %}\n {{- ']' }}\n{%- else %}\n {{- 'Any' }}\n{%- endif %}\n{%- endmacro %}\n\n{%- macro object_to_fields(json_spec, field_indent) %}\n {%- set o_ns = namespace(f = caller()) %}\n {%- for param_name, param_fields in json_spec.properties|items %}\n {%- if param_fields.enum %}\n {{- '\\n\\nclass ' + param_name|title + '(Enum):\\n' }}\n {%- for enum_option in param_fields.enum %}\n {{- ' enum_' + loop.index0|string + ' = ' + enum_option|tojson + '\\n' }}\n {%- endfor %}\n {%- elif param_fields.type == 'object' and param_fields.properties %}\n {%- call object_to_fields(param_fields, ' ') %}\n {{- '\\n\\nclass ' + param_name|title + '(BaseModel):\\n' }}\n {%- endcall %}\n {%- elif param_fields.type == 'array' and param_fields['items'] and param_fields['items'].type == 'object' and param_fields['items'].properties %}\n {%- call object_to_fields(param_fields['items'], ' ') %}\n {{- '\\n\\nclass ' + param_name|title + '(BaseModel):\\n' }}\n {%- endcall %}\n {%- endif %}\n {%- set param_default = param_fields.default|tojson if param_fields.default is string else param_fields.default|string if param_fields.default is defined else 'None' %}\n {%- set o_ns.f = o_ns.f + field_indent + param_name + ': ' %}\n {%- set o_ns.f = o_ns.f + ('Optional[' + json_to_python_type(param_name, param_fields) + ']' if param_name not in json_spec.required else json_to_python_type(param_name, param_fields)) %}\n {%- if not param_fields.title and not param_fields.description and not param_fields.pattern %}\n {%- set o_ns.f = o_ns.f + (' = ' + param_default if param_name not in json_spec.required else '') %}\n {%- else %}\n {%- set o_ns.f = o_ns.f + (' = Field(...' if param_name in json_spec.required else ' = Field(' + param_default) %}\n {%- set o_ns.f = o_ns.f + (', description=' + param_fields.description|tojson if param_fields.description else '') %}\n {%- set o_ns.f = o_ns.f + (', regex=' + param_fields.pattern|tojson if param_fields.pattern else '') %}\n {%- set o_ns.f = o_ns.f + (', title=' + param_fields.title|tojson if param_fields.title else '') %}\n {%- set o_ns.f = o_ns.f + ')' %}\n {%- endif %}\n {%- set o_ns.f = o_ns.f + '\\n' %}\n {%- endfor %}\n {{- o_ns.f }}\n{%- endmacro %}\n\n{%- macro tool_parser(tools) %}\n{%- for tool in tools %}\n {%- if tool.type is not defined or tool.type == 'function' %}\n {%- if tool.function is defined %}\n {%- set tool = tool.function %}\n {%- endif %}\n {%- set tool_params = tool.parameters if tool.parameters is defined else none %}\n {%- call object_to_fields(tool_params, ' ') %}\n {{- '\\n\\ndef ' + tool.name + '(' }}\n {%- if tool_params %}\n {%- for param_name, param_fields in tool_params.properties|items %}\n {%- set param_default = param_fields.default|tojson if param_fields.default is string else param_fields.default|string if param_fields.default is defined else 'None' %}\n {{- ', ' if loop.index0 != 0 }}\n {{- param_name }}\n {{- '=' + param_default if param_name not in tool_params.required }}\n {%- endfor %}\n {%- endif %}\n {{- '):\\n \"\"\"' }}\n {{- tool.description }}\n {{- '\\n\\n Args:\\n' if tool_params else '\\n' }}\n {%- endcall %}\n {{- ' \"\"\"\\n' }}\n {%- endif %}\n{%- endfor %}\n{%- endmacro %}\n\n{%- if messages[0]['role'] == 'system' %}\n {%- set loop_messages = messages[1:] %}\n {%- set system_message = messages[0]['content'] %}\n{%- else %}\n {%- set loop_messages = messages %}\n {%- set system_message = '' %}\n{%- endif %}\n{{- '<|im_start|>system\\n' + system_message if system_message or tools }}\n{%- if tools %}\n {{- '\\n# Functions\\nHere is a list of functions that you can invoke:\\n```python\\nfrom enum import Enum\\nfrom typing import List, Dict, Optional\\nfrom pydantic import BaseModel, Field\\n\\n' }}\n {{- tool_parser(tools) }}\n {{- \"\\n```\\n\\n# Function Call Rule and Output Format\\n- If the user's question can be answered without calling any function, please answer the user's question directly. In this situation, you should return your thought and answer the user's question directly.\\n- If the user cannot be answered without calling any function, and the user does not provide enough information to call functions, please ask the user for more information. In this situation, you should return your thought and ask the user for more information.\\n- If the user's question cannot be answered without calling any function, and the user has provided enough information to call functions to solve it, you should call the functions. In this situation, the assistant should return your thought and call the functions.\\n- Use default parameters unless the user has specified otherwise.\\n- You should answer in the following format:\\n\\n<|thought_start|>\\n{explain why the user's question can be answered without calling a function or why you should ask the user for more information or why you should call one or more functions and your plan to solve the user's question.}\\n<|thought_end|>\\n<|tool_call_start|>\\n```python\\nfunc1(params_name=params_value, params_name2=params_value2...)\\nfunc2(params)\\n```\\n<|tool_call_end|>\\n{answer the user's question directly or ask the user for more information}\" }}\n{%- endif %}\n{{- '<|im_end|>\\n' if system_message or tools }}\n{%- for message in loop_messages %}\n {%- set content = message.content %}\n {%- if message.role == 'assistant' and message.tool_calls %}\n {{- '<|im_start|>' + message.role + '\\n' }}\n {{- '<|thought_start|>\\n' + message.thought + '\\n<|thought_end|>\\n' if message.thought }}\n {{- '<|tool_call_start|>\\n```python\\n' }}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- tool_call.name + '(' }}\n {%- if tool_call.arguments is defined and tool_call.arguments|length > 0 %}\n {%- for param_name, param_value in tool_call.arguments|items %}\n {{- param_name + '=' + param_value|tojson }}\n {{- ',' if not loop.last }}\n {%- endfor %}\n {%- endif %}\n {{- ')\\n' }}\n {%- endfor %}\n {{- '```\\n<|tool_call_end|>\\n' }}\n {{- content if content and not content.startswith('<|tool_call_start|>') }}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == 'assistant' and message.thought %}\n {{- '<|im_start|>' + message.role + '\\n' + '<|thought_start|>\\n' + message.thought + '\\n<|thought_end|>\\n' + content + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endfor %}\n\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}" + """ + tools = convert_to_tool(function, GORILLA_TO_OPENAPI, self.model_style) + + formated_messages = minicpm_input_format( + messages=messages, tools=tools, model_name=self.model_name + ) + formatted_prompt = "" + for message in formated_messages: + formatted_prompt += ( + f"<|im_start|>{message['role']}\n{message['content']}<|im_end|>\n" + ) + + formatted_prompt += "<|im_start|>assistant\n" + return formatted_prompt + + @overrides + def _pre_query_processing_prompting(self, test_entry: dict) -> dict: + functions: list = test_entry["function"] + test_category: str = test_entry["id"].rsplit("_", 1)[0] + + functions = func_doc_language_specific_pre_processing(functions, test_category) + + # MiniCPM use its own system prompt in FC mode + + return {"message": [], "function": functions} + + @overrides + def decode_ast(self, result, language="Python"): + msg = fc2dict(result) + if ( + "tool_calls" in msg + and msg["tool_calls"] is not None + and len(msg["tool_calls"]) > 0 + ): + return [ + {tool_call["name"]: tool_call["arguments"]} + for tool_call in msg["tool_calls"] + ] + else: + return msg["content"] + + @overrides + def decode_execute(self, result): + msg = fc2dict(result) + if ( + "tool_calls" in msg + and msg["tool_calls"] is not None + and len(msg["tool_calls"]) > 0 + ): + execution_list = [] + for tool_call in msg["tool_calls"]: + func_name = tool_call["name"] + args_str = ", ".join( + f"{k}={repr(v)}" for k, v in tool_call["arguments"].items() + ) + execution_list.append(f"{func_name}({args_str})") + return execution_list + else: + return [] + + +def message_format(msg, system_suffix="", user_prefix=""): + if "thought" in msg and msg["thought"] is not None and len(msg["thought"]) > 0: + thought_prefix = f"<|thought_start|>\n{msg['thought']}\n<|thought_end|>\n" + else: + thought_prefix = "" + if msg["role"] == "assistant": + content = msg.get("content", "") + if content is None: + content = "" + if ( + "tool_calls" in msg + and msg["tool_calls"] is not None + and len(msg["tool_calls"]) > 0 + ): + + def add_quotes(variable): + if isinstance(variable, str): + return repr(variable) + else: + return str(variable) + + tool_calls = [] + for tool_call in msg["tool_calls"]: + if tool_call is None: + continue + tool_name = tool_call["name"] + if "arguments" not in tool_call or tool_call["arguments"] is None: + continue + if isinstance(tool_call["arguments"], str): + try: + tool_call["arguments"] = json.loads(tool_call["arguments"]) + except: + continue + args = ",".join( + [k + "=" + add_quotes(v) for k, v in tool_call["arguments"].items()] + ) + tool_calls.append(f"{tool_name}({args})") + + content = ( + thought_prefix + + "<|tool_call_start|>\n```python\n" + + "\n".join(tool_calls).strip() + + "\n```\n<|tool_call_end|>\n" + + content + ) + msg["content"] = content + else: + content = thought_prefix + content + msg["content"] = content + elif msg["role"] == "user": + msg["content"] = user_prefix + "\n" + msg["content"] + elif msg["role"] == "system": + msg["content"] = msg["content"] + "\n" + system_suffix + msg["content"] = msg["content"].strip() + return msg + + +def jsonschema_to_code(jsonschema: dict) -> str: + input_text = json.dumps(jsonschema) + if datamodel_code_generator.get_version() < "0.26.2": + from datamodel_code_generator.format import PythonVersion + + data_model_types = get_data_model_types( + DataModelType.PydanticBaseModel, + target_python_version=PythonVersion.PY_310, + ) + else: + from datamodel_code_generator.format import DatetimeClassType, PythonVersion + + data_model_types = get_data_model_types( + DataModelType.PydanticBaseModel, + target_python_version=PythonVersion.PY_310, + target_datetime_class=DatetimeClassType.Datetime, + ) + parser = JsonSchemaParser( + source=input_text, + data_model_type=data_model_types.data_model, + data_model_root_type=data_model_types.root_model, + data_model_field_type=data_model_types.field_model, + data_type_manager_type=data_model_types.data_type_manager, + target_python_version=PythonVersion.PY_311, + dump_resolve_reference_action=data_model_types.dump_resolve_reference_action, + field_constraints=True, + ) + results = parser.parse() + return results + + +def transform_function(function: dict): + """turn json format of function into signature""" + params, default_params = [], [] + for prop_name, prop in function["parameters"]["properties"].items(): + if "default" in prop: + default_params.append(f'{prop_name}={repr(prop["default"])}') + elif prop_name not in function["parameters"].get("required", []): + default_params.append(f"{prop_name}={repr(None)}") + else: + params.append(prop_name) + ps = ", ".join(params + default_params) + res = "def {f_name}({ps}):\n".format(f_name=function["name"], ps=ps) + f_des = function.get("description", "") + content = jsonschema_to_code(function["parameters"]) + if "class" in content: + i = content.index("class") + content = content[i:] + classes, args = content.split("class Model(BaseModel):", 1) + lint_msg = f' """\n {f_des}\n Args:\n{args}\n """\n' + res += lint_msg + if len(classes) > 0: + res = classes + res + return res + + +def minicpm_input_format( + messages: List[Dict], + tools: List[Dict], + add_to_system=True, + model_name="openbmb/MiniCPM3-4B", +): + """ + Process the input messages, global_arguments, tools, tool_choice, + and convert it into a input string. + The global arguments and tools can not be both empty. + parameters: + messages: List[Dict] + the input messages + For example: + tools: List[Dict] + the tools list you can use + For example: + """ + if tools is not None and len(tools) > 0: + header = "from enum import Enum\nfrom typing import List, Dict, Optional\nfrom pydantic import BaseModel, Field\n\n" + tools_string = header + for tool in tools: + try: + tools_string += "\n\n" + transform_function(tool) + except: + pass + # print(traceback.format_exc()) + tools_template = """# Functions +Here is a list of functions that you can invoke: +```python +{tools} +``` + +# Function Call Rule and Output Format +- If the user's question can be answered without calling any function, please answer the user's question directly. In this situation, you should return your thought and answer the user's question directly. +- If the user cannot be answered without calling any function, and the user does not provide enough information to call functions, please ask the user for more information. In this situation, you should return your thought and ask the user for more information. +- If the user's question cannot be answered without calling any function, and the user has provided enough information to call functions to solve it, you should call the functions. In this situation, the assistant should return your thought and call the functions. +- Use default parameters unless the user has specified otherwise. +- You should answer in the following format: + +<|thought_start|> +{{explain why the user's question can be answered without calling a function or why you should ask the user for more information or why you should call one or more functions and your plan to solve the user's question.}} +<|thought_end|> +<|tool_call_start|> +```python +func1(params_name=params_value, params_name2=params_value2...) +func2(params) +``` +<|tool_call_end|> +{{answer the user's question directly or ask the user for more information}} +""" + tools_string = tools_template.format(tools=tools_string).strip() + else: + tools_string = "" + + if add_to_system: + if len(messages) > 0 and messages[0]["role"] != "system": + messages.insert(0, {"role": "system", "content": ""}) + return [ + message_format(msg, system_suffix=tools_string, user_prefix="") + for msg in messages + ] + else: + return [ + message_format(msg, system_suffix="", user_prefix=tools_string) + for msg in messages + ] + + +def convert_function_call_to_json(string): + try: + tool_calls = [] + x = ast.parse(string) + for tool in x.body: + function_name = tool.value.func.id + function_args = {} + for kw in tool.value.keywords: + function_args[kw.arg] = ast.literal_eval(kw.value) + this_one = {"name": function_name, "arguments": function_args} + tool_calls.append(this_one) + return tool_calls + except Exception: + return [] + + +def fc2dict( + sequence: str, + tool_call_start="<|tool_call_start|>", + tool_call_end="<|tool_call_end|>", + thought_start="<|thought_start|>", + thought_end="<|thought_end|>", +): + if thought_end in sequence and thought_start in sequence: + thought_string, sequence = sequence.rsplit(thought_end, 1) + thought_string = thought_string.split(thought_start, 1)[1] + else: + thought_string = "" + if tool_call_start in sequence and tool_call_end in sequence: + tool_call_string, content = sequence.rsplit(tool_call_end, 1) + tool_call_string = tool_call_string.split(tool_call_start, 1)[1] + try: + tool_calls = [] + tool_call_string = tool_call_string.strip() + if tool_call_string.startswith("```"): + tool_call_string = tool_call_string.lstrip("```").strip() + if tool_call_string.startswith("python"): + tool_call_string = tool_call_string.lstrip("python").strip() + if tool_call_string.endswith("```"): + tool_call_string = tool_call_string.rstrip("```").strip() + + parsed = ast.parse(tool_call_string) + + for elem in parsed.body: + assert isinstance(elem.value, ast.Call) + calls = resolve_ast_call(elem.value) + + for func_name, func_args in calls.items(): + + this_one = {"name": func_name, "arguments": func_args} + tool_calls.append(this_one) + + return { + "content": content.strip(), + "tool_calls": tool_calls, + "role": "assistant", + } + except: + return { + "content": content.strip(), + "role": "assistant", + "thought": thought_string, + } + else: + return { + "content": sequence.strip(), + "role": "assistant", + "thought": thought_string, + } diff --git a/berkeley-function-call-leaderboard/pyproject.toml b/berkeley-function-call-leaderboard/pyproject.toml index 4b2e803fe..d486118b4 100644 --- a/berkeley-function-call-leaderboard/pyproject.toml +++ b/berkeley-function-call-leaderboard/pyproject.toml @@ -28,10 +28,11 @@ dependencies = [ "cohere==5.5.8", "typer>=0.12.5", "tabulate>=0.9.0", + "datamodel-code-generator==0.25.7", "google-cloud-aiplatform==1.72.0", "mpmath==1.3.0", "tenacity==9.0.0", - "overrides==7.7.0" + "overrides" ] [project.scripts] From 6742baf32545509ab086ae742fcd6a15b574c50e Mon Sep 17 00:00:00 2001 From: "Huanzhi (Hans) Mao" Date: Tue, 26 Nov 2024 14:32:58 -0800 Subject: [PATCH 3/3] add final to base_handler; clean up --- .../bfcl/model_handler/base_handler.py | 7 +++++++ .../model_handler/oss_model/base_oss_handler.py | 5 +++-- .../bfcl/model_handler/oss_model/deepseek_coder.py | 3 +-- .../bfcl/model_handler/oss_model/gemma.py | 6 +++--- .../bfcl/model_handler/oss_model/glaive.py | 4 +++- .../bfcl/model_handler/oss_model/glm.py | 2 +- .../bfcl/model_handler/oss_model/granite.py | 3 ++- .../bfcl/model_handler/oss_model/hammer.py | 2 +- .../bfcl/model_handler/oss_model/hermes.py | 13 ++++++++----- .../bfcl/model_handler/oss_model/llama.py | 1 + .../bfcl/model_handler/oss_model/llama_fc.py | 3 --- .../bfcl/model_handler/oss_model/minicpm.py | 2 +- .../bfcl/model_handler/oss_model/phi.py | 3 ++- .../bfcl/model_handler/oss_model/qwen.py | 1 - .../bfcl/model_handler/oss_model/salesforce.py | 7 ++++--- 15 files changed, 37 insertions(+), 25 deletions(-) diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/base_handler.py b/berkeley-function-call-leaderboard/bfcl/model_handler/base_handler.py index d56d449ac..a9b251632 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/base_handler.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/base_handler.py @@ -15,6 +15,7 @@ ) from bfcl.model_handler.model_style import ModelStyle from bfcl.utils import load_file, make_json_serializable, sort_key +from overrides import final class BaseHandler: @@ -31,6 +32,7 @@ def __init__(self, model_name, temperature) -> None: self.temperature = temperature self.is_fc_model = False # Whether the model is a function calling model + @final def inference(self, test_entry: dict, include_input_log: bool, include_state_log: bool): # This method is used to retrive model response for each model. @@ -52,6 +54,7 @@ def inference(self, test_entry: dict, include_input_log: bool, include_state_log test_entry, include_input_log ) + @final def inference_multi_turn_FC( self, test_entry: dict, include_input_log: bool, include_state_log: bool ) -> tuple[list[list], dict]: @@ -296,6 +299,7 @@ def inference_multi_turn_FC( return all_model_response, metadata + @final def inference_multi_turn_prompting( self, test_entry: dict, include_input_log: bool, include_state_log: bool ) -> tuple[list[list], dict]: @@ -537,6 +541,7 @@ def inference_multi_turn_prompting( return all_model_response, metadata + @final def inference_single_turn_FC( self, test_entry: dict, include_input_log: bool ) -> tuple[any, dict]: @@ -569,6 +574,7 @@ def inference_single_turn_FC( return model_response_data["model_responses"], metadata + @final def inference_single_turn_prompting( self, test_entry: dict, include_input_log: bool ) -> tuple[any, dict]: @@ -607,6 +613,7 @@ def decode_execute(self, result): # This method takes raw model output and convert it to standard execute checker input. raise NotImplementedError + @final def write(self, result, result_dir, update_mode=False): model_name_dir = self.model_name.replace("/", "_") model_result_dir = result_dir / model_name_dir diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/base_oss_handler.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/base_oss_handler.py index c16c98c9b..90e67fdbe 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/base_oss_handler.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/base_oss_handler.py @@ -1,9 +1,7 @@ import subprocess import threading import time -import json from concurrent.futures import ThreadPoolExecutor -from overrides import EnforceOverrides, final import requests from bfcl.constant import RESULT_PATH, VERSION_PREFIX @@ -17,6 +15,7 @@ system_prompt_pre_processing_chat_model, ) from openai import OpenAI +from overrides import EnforceOverrides, final from tqdm import tqdm @@ -28,6 +27,7 @@ def __init__(self, model_name, temperature, dtype="bfloat16") -> None: self.dtype = dtype self.client = OpenAI(base_url=f"http://localhost:{VLLM_PORT}/v1", api_key="EMPTY") + @final def inference(self, test_entry: dict, include_input_log: bool, include_state_log: bool): """ OSS models have a different inference method. @@ -252,6 +252,7 @@ def _multi_threaded_inference(self, test_case, include_input_log: bool, include_ return result_to_write #### Prompting methods #### + def _format_prompt(self, messages, function): raise NotImplementedError( "OSS Models should implement their own prompt formatting." diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/deepseek_coder.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/deepseek_coder.py index 58c99e7bb..d0dacb4bf 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/deepseek_coder.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/deepseek_coder.py @@ -1,8 +1,6 @@ import json import re -from overrides import overrides - from bfcl.model_handler.oss_model.base_oss_handler import OSSHandler from bfcl.model_handler.utils import ( combine_consecutive_user_prompts, @@ -10,6 +8,7 @@ convert_to_function_call, func_doc_language_specific_pre_processing, ) +from overrides import overrides class DeepseekCoderHandler(OSSHandler): diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/gemma.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/gemma.py index 63a1b3175..e4f2e1cf5 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/gemma.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/gemma.py @@ -1,13 +1,13 @@ from bfcl.model_handler.oss_model.base_oss_handler import OSSHandler from bfcl.model_handler.utils import ( + combine_consecutive_user_prompts, + convert_system_prompt_into_user_prompt, func_doc_language_specific_pre_processing, system_prompt_pre_processing_chat_model, - convert_system_prompt_into_user_prompt, - combine_consecutive_user_prompts, ) - from overrides import overrides + class GemmaHandler(OSSHandler): def __init__(self, model_name, temperature) -> None: super().__init__(model_name, temperature) diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/glaive.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/glaive.py index cd0ee7cde..f44e3f0c5 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/glaive.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/glaive.py @@ -1,8 +1,10 @@ +import json + from bfcl.model_handler.oss_model.base_oss_handler import OSSHandler from bfcl.model_handler.utils import convert_to_function_call -import json from overrides import overrides + class GlaiveHandler(OSSHandler): def __init__(self, model_name, temperature) -> None: super().__init__(model_name, temperature) diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/glm.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/glm.py index 7cb67fbe5..d1f6c4eac 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/glm.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/glm.py @@ -8,9 +8,9 @@ convert_to_tool, func_doc_language_specific_pre_processing, ) - from overrides import overrides + class GLMHandler(OSSHandler): def __init__(self, model_name, temperature) -> None: super().__init__(model_name, temperature) diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/granite.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/granite.py index 7c4daa357..161d5177a 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/granite.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/granite.py @@ -7,8 +7,9 @@ convert_to_tool, func_doc_language_specific_pre_processing, ) - from overrides import overrides + + class GraniteHandler(OSSHandler): def __init__(self, model_name, temperature) -> None: super().__init__(model_name, temperature) diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/hammer.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/hammer.py index 4df3454e3..2b4315dc3 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/hammer.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/hammer.py @@ -5,8 +5,8 @@ convert_system_prompt_into_user_prompt, func_doc_language_specific_pre_processing, ) - from overrides import overrides + TASK_INSTRUCTION = """You are a tool calling assistant. In order to complete the user's request, you need to select one or more appropriate tools from the following tools and fill in the correct values for the tool parameters. Your specific tasks are: 1. Make one or more function/tool calls to meet the request based on the question. 2. If none of the function can be used, point it out and refuse to answer. diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/hermes.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/hermes.py index 750663dbf..05822bf2c 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/hermes.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/hermes.py @@ -1,11 +1,14 @@ -from bfcl.model_handler.oss_model.base_oss_handler import OSSHandler -from bfcl.model_handler.utils import convert_to_tool, func_doc_language_specific_pre_processing +import inspect +import json + from bfcl.model_handler.constant import GORILLA_TO_OPENAPI from bfcl.model_handler.model_style import ModelStyle - +from bfcl.model_handler.oss_model.base_oss_handler import OSSHandler +from bfcl.model_handler.utils import ( + convert_to_tool, + func_doc_language_specific_pre_processing, +) from overrides import overrides -import json -import inspect class HermesHandler(OSSHandler): diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/llama.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/llama.py index c793419a6..373e110e8 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/llama.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/llama.py @@ -1,6 +1,7 @@ from bfcl.model_handler.oss_model.base_oss_handler import OSSHandler from overrides import overrides + # Note: This is the handler for the Llama models in prompring mode. # For function call mode, use LlamaFCHandler instead. # Llama 3 series are benchmarked in prompting mode while the Llama 3.1 series are benchmarked in function call mode. diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/llama_fc.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/llama_fc.py index d6821b213..2c4d48c52 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/llama_fc.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/llama_fc.py @@ -2,11 +2,8 @@ from bfcl.model_handler.oss_model.base_oss_handler import OSSHandler from bfcl.model_handler.utils import func_doc_language_specific_pre_processing - from overrides import overrides -# TODO: Merge with LlamaHandler - class LlamaFCHandler(OSSHandler): def __init__(self, model_name, temperature) -> None: diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm.py index 7d08e67a7..97a2f0ca1 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm.py @@ -1,7 +1,7 @@ from bfcl.model_handler.oss_model.base_oss_handler import OSSHandler - from overrides import overrides + class MiniCPMHandler(OSSHandler): def __init__(self, model_name, temperature) -> None: super().__init__(model_name, temperature) diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/phi.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/phi.py index 4b75912dd..236757497 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/phi.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/phi.py @@ -5,8 +5,9 @@ func_doc_language_specific_pre_processing, system_prompt_pre_processing_chat_model, ) - from overrides import overrides + + class PhiHandler(OSSHandler): def __init__(self, model_name, temperature) -> None: super().__init__(model_name, temperature) diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/qwen.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/qwen.py index 5b40a932c..a8786c2b4 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/qwen.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/qwen.py @@ -1,5 +1,4 @@ from bfcl.model_handler.oss_model.base_oss_handler import OSSHandler - from overrides import overrides diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/salesforce.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/salesforce.py index 17b065695..2feef25e6 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/salesforce.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/salesforce.py @@ -1,12 +1,12 @@ import json from bfcl.model_handler.model_style import ModelStyle -from bfcl.model_handler.oss_model.constant import VLLM_PORT from bfcl.model_handler.oss_model.base_oss_handler import OSSHandler +from bfcl.model_handler.oss_model.constant import VLLM_PORT from openai import OpenAI - from overrides import overrides + class SalesforceHandler(OSSHandler): def __init__(self, model_name, temperature) -> None: super().__init__(model_name, temperature) @@ -78,7 +78,8 @@ def xlam_json_to_python_tool_calls(tool_calls): return python_format - def convert_to_dict(self, input_str): + @staticmethod + def convert_to_dict(input_str): """ Convert a JSON-formatted string into a dictionary of tool calls and their arguments.